def get_video_files(argvs): try: database = MetaDataDB.from_file(argvs[1]) except: print("Invalid database file") return None, None, None # now search for specified lecture lecture_name = argvs[2].lower() current_lecture = None for lecture in database.lectures: if lecture.title.lower() == lecture_name: current_lecture = lecture break if current_lecture is None: print("Lecture not found in database") print("Available lectures:") for lecture in database.lectures: print(lecture.title) return None, None, None m_videos = [video["path"] for video in current_lecture.main_videos] return m_videos, database, current_lecture
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config [dataset]".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") print("\tdataset:\tDataset to run (Default= Training)") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") video_metadata_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_VIDEO_META_DATA_DIR") os.makedirs(video_metadata_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] for current_lecture in testing_set: print("") print("processing: " + current_lecture.title) # print(all_keyframes) # the simple frame sampling worker .. worker = SimpleFrameSampler() # main video file names m_videos = [ config.get_str("VIDEO_FILES_PATH") + "/" + video["path"] for video in current_lecture.main_videos ] video_info = {} if "forced_width" in current_lecture.parameters: video_info["width"] = current_lecture.parameters["forced_width"] video_info["height"] = current_lecture.parameters["forced_height"] else: # execute the actual process .... processor = SequentialVideoSampler(m_videos, [0]) processor.doProcessing(worker, 0, True) # 0 video_info["width"] = worker.width video_info["height"] = worker.height output_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(video_info, output_filename)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE") normalization_bone = config.get("SPEAKER_NORMALIZATION_BONE") # pair of norm factor points # get the paths to the outputs from previous scripts .... output_dir = config.get_str("OUTPUT_PATH") # the per lecture openpose CSV lecture_filename_prefix = output_dir + "/" + config.get_str("OPENPOSE_OUTPUT_DIR_CSV") + "/" + database.name + "_" output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") os.makedirs(output_segment_dir, exist_ok=True) segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH") for lecture in testing_set: lecture_filename = lecture_filename_prefix + lecture.title + ".csv" print("Loading data for: " + lecture_filename) # get the corresponding data for this lecture ... lec_segments, lecture_data = LecturePoseSegments.InitializeFromLectureFile(lecture_filename, normalization_bone, remove_confidence) # sequential sampling for pose segments vid_len = lecture_data.shape[0] for ind in range(0, int(vid_len / segment_length)): f_start = ind * segment_length f_end = f_start + segment_length - 1 temp_data = lecture_data[f_start:f_end + 1, :] lec_segments.segments.append(PoseSegmentData(f_start, f_end, None, temp_data)) # save .... output_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" MiscHelper.dump_save(lec_segments, output_filename)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") classifier_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_DIR") os.makedirs(classifier_dir, exist_ok=True) classifier_filename = classifier_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_FILENAME") dataset_name = config.get("SPEAKER_TRAINING_SET_NAME") training_set = database.datasets[dataset_name] training_titles = [lecture.title.lower() for lecture in training_set] # get classifier parameters rf_n_trees = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_TREES", 64) rf_depth = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_DEPTH", 16) # read all training data available .... train_dataset = {} for lecture in training_set: input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" train_dataset[lecture.title.lower()] = MiscHelper.dump_load( input_filename) train_x, train_y, train_frame_infos = PoseFeatureExtractor.combine_datasets( training_titles, train_dataset) # classify and confusion matrix part clf = RandomForestClassifier(n_estimators=rf_n_trees, max_depth=rf_depth, random_state=0) clf = clf.fit(train_x, train_y) MiscHelper.dump_save(clf, classifier_filename)
def prepare(args): # load database try: database = MetaDataDB.from_file(args['database']) except: print("Invalid AccessMath database file") return TangentV_Helper.VisualizerServer = database.indexing.visualization_server # ... Load 3D structures .... print("Loading CC indices per lecture ... ") for lecture in database.lectures: struct_filename = database.output_temporal + '/' + Parameters.Output_ST3D + str(lecture.id) + ".dat" TangentV_Helper.cache_3DSTs[lecture.title] = MiscHelper.dump_load(struct_filename)
def initialize(self): # load database info try: self.database = MetaDataDB.from_file(self.database_file) except: print("Invalid database file") return False self.params = MiscHelper.optional_parameters(self.raw_params, 0) # process the specified dataset(s) if "d" in self.params: if not isinstance(self.params["d"], list): self.params["d"] = [self.params["d"]] valid_datasets = [] for name in self.params["d"]: dataset = self.database.get_dataset(name) if dataset is None: print("Invalid Dataset name <" + name + ">") return False else: valid_datasets.append(dataset) self.params["d"] = valid_datasets # process only the specified lectures if "l" in self.params: if not isinstance(self.params["l"], list): self.params["l"] = [self.params["l"]] self.params["l"] = [name.lower() for name in self.params["l"]] # override the input prefix if "i" in self.params: self.input_temp_prefix = self.params["i"] # override the output prefix if "o" in self.params: self.input_temp_prefix = self.params["o"] self.temp_dir = self.database.output_temporal self.out_dir = self.database.output_preprocessed self.img_dir = self.database.output_images # success loading database file .. return True
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") dataset_name = config.get("SPEAKER_TRAINING_SET_NAME") training_set = database.datasets[dataset_name] # prepare the feature extractor ... feature_points = config.get("SPEAKER_ACTION_FEATURE_POINTS") segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH", 15) feat_extractor = PoseFeatureExtractor(feature_points, segment_length) features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") os.makedirs(features_dir, exist_ok=True) # for each file ... get features ... for lecture in training_set: input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" output_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" lecture_pose_segments = MiscHelper.dump_load(input_filename) vid_data = feat_extractor.get_feature_dataset(lecture_pose_segments) MiscHelper.dump_save(vid_data, output_filename) return
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("\n\tWhere:") print( "\tgt_lablels:\t(Optional) Set to 1 to use Ground Truth labels instead of predictions" ) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] valid_actions = config.get("SPEAKER_VALID_ACTIONS") # get the paths to the outputs from previous scripts .... output_dir = config.get_str("OUTPUT_PATH") output_segment_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") os.makedirs(output_bboxes_dir, exist_ok=True) remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE") speaker_right_handed = config.get("SPEAKER_IS_RIGHT_HANDED") n_joints_body = 25 n_joints_hand = 21 if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False col_name = [ 'frame_id', ('ground_truth' if use_ground_truth else 'pred_label'), 'body_xmin', 'body_xmax', 'body_ymin', 'body_ymax', 'rh_xmin', 'rh_xmax', 'rh_ymin', 'rh_ymax' ] segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH") # load data + label for lecture in testing_set: input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" lec_segments = MiscHelper.dump_load(input_filename) if use_ground_truth: labels = lec_segments.get_all_labels() else: input_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv" _, _, labels, _ = ResultReader.read_actions_probabilities_file( input_proba_filename, valid_actions) output_filename = output_bboxes_dir + "/" + database.name + "_" + lecture.title + ".csv" output_file = ResultRecorder(output_filename) output_file.write_headers(col_name) # get bbox for skeleton and right hands from all segments frames = [] segment_labels = [] body_bbox = [] rh_bbox = [] for ind in range(0, len(lec_segments.segments)): # get the pose data ... if not remove_confidence: # the data contains confidence ... which needs to be removed at this point ... base_pose_data = lec_segments.segments[ind].pose_data total_joints = n_joints_body + n_joints_hand * 2 seg_pose_data = np.zeros( (base_pose_data.shape[0], total_joints * 2), dtype=base_pose_data.dtype) seg_pose_data[:, ::2] = base_pose_data[:, ::3] seg_pose_data[:, 1::2] = base_pose_data[:, 1::3] else: # confidence has been removed .... seg_pose_data = lec_segments.segments[ind].pose_data body_features = seg_pose_data[:, 0:n_joints_body * 2] if speaker_right_handed: # get right hand data rh_features = seg_pose_data[:, (n_joints_body + n_joints_hand) * 2:] else: # use left hand data rh_features = seg_pose_data[:, n_joints_body * 2:(n_joints_body + n_joints_hand) * 2] # get body bboxes and add to the list .... temp_body_bbox = PoseSegmentData.get_bbox_frame_data( body_features, 2) body_bbox += temp_body_bbox.tolist() # get hand bboxes and add to the list .... temp_rh_bbox = PoseSegmentData.get_bbox_frame_data(rh_features, 2) rh_bbox += temp_rh_bbox.tolist() # add frame range .... f_start = lec_segments.segments[ind].frame_start f_end = lec_segments.segments[ind].frame_end temp_frames = list(range(f_start, f_end + 1)) frames += temp_frames # add label .... temp_label = [[labels[ind]] for _ in range(segment_length) ] # remove seg_len, you don't need this segment_labels += temp_label paras = frames, segment_labels, body_bbox, rh_bbox output_file.record_results(paras)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") print("\tgt_labels:\tuse ground truth action labels (Default= False)") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") video_metadata_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_VIDEO_META_DATA_DIR") fg_mask_dir = output_dir + "/" + config.get_str( "SPEAKER_FG_ESTIMATION_MASK_DIR") os.makedirs(fg_mask_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] speaker_exp_factor = config.get_float( "SPEAKER_FG_ESTIMATION_SPK_EXPANSION_FACTOR") min_mask_frames = config.get_int("SPEAKER_FG_ESTIMATION_MIN_MASK_FRAMES") mask_exp_radius = config.get_int( "SPEAKER_FG_ESTIMATION_MASK_EXPANSION_RADIUS") if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False for current_lecture in testing_set: bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv" frame_idxs, actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file( bbox_filename, use_ground_truth) info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_info = MiscHelper.dump_load(info_filename) fg_estimator = ForegroundEstimator(video_info["width"], video_info["height"], speaker_exp_factor, min_mask_frames, mask_exp_radius) fg_mask = fg_estimator.get_mask(frame_idxs, actions, body_bboxes, rh_bboxes) # cv2.imshow(current_lecture.id, fg_mask) # cv2.waitKey() flag, raw_data = cv2.imencode(".png", fg_mask) output_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(raw_data, output_filename)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") action_class_output_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_OUTPUT_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") os.makedirs(action_class_output_dir, exist_ok=True) os.makedirs(action_class_probabilities_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] # load the saved model for action classification ... classifier_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_DIR") classifier_filename = classifier_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_FILENAME") clf = MiscHelper.dump_load(classifier_filename) csv_col = ['frame_start', 'frame_end', 'prediction'] for lecture in testing_set: input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" output_actions_filename = action_class_output_dir + "/" + database.name + "_" + lecture.title + ".csv" output_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv" # load data ... data_xy = MiscHelper.dump_load(input_filename) # classier predict .... test_x = data_xy["features"] y_pred = clf.predict(test_x) y_pred_re = y_pred.reshape((y_pred.shape[0], 1)) # save prediction result output_csv = ResultRecorder(output_actions_filename) output_csv.write_headers(csv_col) # the function accepts a list of columns to save on CSV ... # by transposing, we make the standard list of rows into a list of columns for the function ... paras = np.hstack((data_xy["frame_infos"], y_pred[:, None])).transpose() output_csv.record_results(paras) # save label probabilities all_classes = clf.classes_ y_prob = clf.predict_proba(test_x) infos = np.concatenate((y_pred_re, y_prob), axis=1) output_csv = ResultRecorder(output_proba_filename) output_csv.write_headers(csv_col + all_classes.tolist()) # ... IDEM .... paras = np.hstack((data_xy["frame_infos"], infos)).transpose() output_csv.record_results(paras)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") export_prefix = output_dir + "/" + database.output_annotations + "/" + database.name + "_" action_object_name = config.get_str("SPEAKER_ACTION_MAIN_OBJECT", "speaker") action_segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH", 15) action_segment_sampling = config.get_int( "SPEAKER_ACTION_SEGMENT_SAMPLING_MODE", 2) # MODE!! action_segment_tracks = config.get_int( "SPEAKER_ACTION_SEGMENT_SAMPLING_TRACKS", 4) action_segment_output_dir = config.get_str( "SPEAKER_ACTION_SEGMENT_OUTPUT_DIR", ".") segments_output_prefix = output_dir + "/" + action_segment_output_dir + "/" + database.name + "_" os.makedirs(output_dir + "/" + action_segment_output_dir, exist_ok=True) sampler = ActionSegmentSampling(action_segment_sampling, action_segment_length, action_segment_tracks) # for each data set ... for dataset_name in database.datasets: print("Processing data set: " + dataset_name) # get segments ... all_dataset_segments = [] for current_lecture in database.datasets[dataset_name]: exported_data_filename = export_prefix + current_lecture.title.lower( ) + "_" + action_object_name + ".csv" print(" - input file: " + exported_data_filename) if not os.path.exists(exported_data_filename): print("\tWARNING: File not found!") continue # call here the sampler .... lecture_title = current_lecture.title.lower() lecture_segments = sampler.sample_from_file( exported_data_filename, lecture_title) all_dataset_segments += lecture_segments # prepare text lines ... output_lines = ["lecture_title,frame_start,frame_end,action\n"] for segment in all_dataset_segments: output_lines.append(",".join([str(value) for value in segment]) + "\n") # save segments for dataset .... output_filename = segments_output_prefix + dataset_name + "_" + action_object_name + ".csv" with open(output_filename, "w") as out_file: out_file.writelines(output_lines) print(" - data saved to: " + output_filename) print("Process complete!")
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") keyframes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_KEYFRAMES_DIR") os.makedirs(keyframes_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] for current_lecture in testing_set: # read segment data .... input_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_segment_data = MiscHelper.dump_load(input_filename) # key-frames that must be extracted from video ... segments, keyframes_per_segment = video_segment_data all_keyframes = [] for segment_keyframes in keyframes_per_segment: all_keyframes += [ keyframe_idx for keyframe_idx, bbox in segment_keyframes ] print("") print("processing: " + current_lecture.title) # print(all_keyframes) # the simple frame sampling worker .. worker = SimpleFrameSampler() # main video file names m_videos = [ config.get_str("VIDEO_FILES_PATH") + "/" + video["path"] for video in current_lecture.main_videos ] # execute the actual process .... processor = SequentialVideoSampler(m_videos, all_keyframes) if "forced_width" in current_lecture.parameters: processor.force_resolution( current_lecture.parameters["forced_width"], current_lecture.parameters["forced_height"]) processor.doProcessing(worker, 0, True) # 0 sampled_frame_data = worker.frame_times, worker.frame_indices, worker.compressed_frames # save results keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(sampled_frame_data, keyframes_data_filename)
def main(): if len(sys.argv) < 2: print("Usage:") print("\tpython train_ml_binarizer.py config [force_update] [classifier_file] [patch_size]") print("") print("Where") print("\tconfig\t\t\tPath to config file") print("\tforce_update \t\tOptional, force to update the sampled Patch file") print("\tclassifier_file \tOptional, force classifier path diff. from Config") print("\tpatch_size \t\tOptional, override patch size") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) # load the database try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid database file") return # <Parameters> run_crossvalidation = config.get("ML_BINARIZER_TRAIN_RUN_CROSSVALIDATION", True) if not config.contains("ML_BINARIZER_PATCHES_FILENAME"): print("Must specificy a file to store sampled patches") return output_dir = config.get_str("OUTPUT_PATH") ml_binarizer_dir = output_dir + "/" + config.get_str("ML_BINARIZER_DIR") patch_filename = ml_binarizer_dir + "/" + config.get_str("ML_BINARIZER_PATCHES_FILENAME") # For debugging/comparsion, use OTSU binarization OTSU_mode = config.get("ML_BINARIZER_TRAIN_OTSU_MODE", False) # baseline_mode = True # Train Random Forest instead . retrain_classifier = config.get("ML_BINARIZER_TRAIN_RETRAIN", True) if not config.get("ML_BINARIZER_OVERRIDE_PARAMETERS", False): # Sampling mode #1: Distribution of proportions # # of 100% pixels, we sample fg_proportion from GT Foreground pixels (handwriting pixels) # handwriting pixels = fg_proportion # all background = (1 - fg_proportion) # # The remaining background pixels are sampled as close or far from foreground # Close to Foreground pixels = (1 - fg_proportion) * bg_close_prop # Remaining background pixels = (1 - fg_proportion) * (1 - bg_close_prop) # # The last proportion of pixels can be obtained from whiteboard or background objects, we separate them as # Not Close Whiteboard background pixels = (1 - fg_proportion) * (1 - bg_close_prop) * bg_board_prop # Not Whiteboard background pixels = (1 - fg_proportion) * (1 - bg_close_prop) * (1 - bg_board_prop) # # Sampling mode #2: Distribution of proportions # # of 100% pixels, we sample fg_proportion from GT Foreground pixels (handwriting pixels) # handwriting pixels = fg_proportion # all background = (1 - fg_proportion) # # The remaining background pixels by average intensity of the window, the greater the average, the more likely they # are to be sampled. This is consistent with sampling mode 1, but is less discrete and requires less parameters sampling_mode = Parameters.MLBin_sampling_mode patch_size = Parameters.MLBin_patch_size patches_per_frame = Parameters.MLBin_sampling_patches_per_frame fg_proportion = Parameters.MLBin_sampling_fg_proportion bg_close_prop = Parameters.MLBin_sampling_bg_close_prop bg_board_prop = Parameters.MLBin_sampling_bg_board_prop mlbin_sigma_color = Parameters.MLBin_sigma_color mlbin_sigma_space = Parameters.MLBin_sigma_space mlbin_median_blur_k = Parameters.MLBin_median_blur_k mlbin_dark_background = Parameters.MLBin_dark_background feature_workers = Parameters.MLBin_train_workers # Random Forest rf_n_trees = Parameters.MLBin_rf_n_trees # 16 rf_max_depth = Parameters.MLBin_rf_max_depth # 12 rf_max_features = Parameters.MLBin_rf_max_features # 32 else: print("Reading ML Binarizer parameters from config ...") sampling_mode = config.get_int("ML_BINARIZER_SAMPLING_MODE", 2) patch_size = config.get_int("ML_BINARIZER_PATCH_SIZE", 7) patches_per_frame = config.get_int("ML_BINARIZER_SAMPLING_PATCHES_PER_FRAME", 20000) fg_proportion = config.get_float("ML_BINARIZER_SAMPLING_FG_PROPORTION", 0.5) bg_close_prop = config.get_float("ML_BINARIZER_SAMPLING_BG_CLOSE_PROPORTION", 0.9) bg_board_prop = config.get_float("ML_BINARIZER_SAMPLING_BG_BOARD_PROPORTION", 1.0) mlbin_sigma_color = config.get_float("ML_BINARIZER_SIGMA_COLOR", 13.5) mlbin_sigma_space = config.get_float("ML_BINARIZER_SIGMA_SPACE", 4.0) mlbin_median_blur_k = config.get_int("ML_BINARIZER_MEDIAN_BLUR_K", 33) mlbin_dark_background = config.get("ML_BINARIZER_DARK_BACKGROUND") feature_workers = config.get_int("ML_BINARIZER_TRAIN_WORKERS", 7) # Random Forest rf_n_trees = config.get_int("ML_BINARIZER_RF_N_TREES", 16) # 16 rf_max_depth = config.get_int("ML_BINARIZER_RF_MAX_DEPTH", 12) # 12 rf_max_features = config.get_int("ML_BINARIZER_RF_MAX_FEATURES", 32) # 32 if len(sys.argv) >= 4: # user specified location classifier_file = sys.argv[3] else: # by default, store at the place specified in the configuration or parameters file ... if not config.get("ML_BINARIZER_OVERRIDE_PARAMETERS", False): classifier_file = Parameters.MLBin_classifier_file else: classifier_file = ml_binarizer_dir + "/" + config.get_str("ML_BINARIZER_CLASSIFIER_FILENAME") feature_function = get_patch_features_raw_values # </Parameters> if len(sys.argv) >= 3: try: force_update = int(sys.argv[2]) > 0 except: print("Invalid value for force_udpate") return else: force_update = False if len(sys.argv) >= 5: try: patch_size = int(sys.argv[4]) except: print("Invalid value for patch_size") return assert (patch_size - 1) % 2 == 0 bg_close_neighborhood = int((patch_size - 1) / 2) + 1 print("Classifier Path: " + classifier_file) ml_binarizer = MLBinarizer(None, patch_size, mlbin_sigma_color, mlbin_sigma_space, mlbin_median_blur_k, mlbin_dark_background) print("... loading data ...") start_loading = time.time() all_keyframes, binarized_keyframes = load_keyframes(output_dir, database) fake_unique_groups, fake_cc_group, fake_segments = generate_fake_keyframe_info(all_keyframes) print("Total Training keyframes: " + str(len(all_keyframes))) end_loading = time.time() start_preprocessing = time.time() print("Pre-processing key-frames", flush=True) all_preprocessed = [] for kf_idx, kf in enumerate(all_keyframes): all_preprocessed.append(ml_binarizer.preprocessing(kf.raw_image)) # cv2.imwrite("DELETE_NOW_tempo_bin_input_" + str(kf_idx) + ".png", all_preprocessed[-1]) end_preprocessing = time.time() start_patch_extraction = time.time() # Extracting/Loading patches used for training (only if not on OTSU's mode) if not OTSU_mode: # generate the patch-based training set ... # check if patch file exists ... if not os.path.exists(patch_filename) or force_update: print("Extracting patches...") if sampling_mode == 1: # SampleEdgeFixBg() patches = PatchSampling.SampleEdgeFixBg(all_keyframes, all_preprocessed, patch_size, patches_per_frame, fg_proportion, bg_close_prop, bg_board_prop, bg_close_neighborhood) elif sampling_mode == 2: # SampleEdgeContBg patches = PatchSampling.SampleEdgeContBg(all_keyframes, all_preprocessed, patch_size, patches_per_frame, fg_proportion) else: patches = (None, None) patches_images, patches_labels = patches # generate features print("\nGenerating features ...", flush=True) all_features = [] with ProcessPoolExecutor(max_workers=feature_workers) as executor: for lect_idx, lecture_images in enumerate(patches_images): print("Processing patches from lecture {0:d} out of {1:d}".format(lect_idx + 1, len(patches_images))) lecture_features = [] for i, patch_features in enumerate(executor.map(feature_function, lecture_images)): lecture_features.append(patch_features) all_features.append(lecture_features) print("\nSaving patches and features to file") out_file = open(patch_filename, "wb") pickle.dump(patches_labels, out_file, pickle.HIGHEST_PROTOCOL) pickle.dump(patches_images, out_file, pickle.HIGHEST_PROTOCOL) pickle.dump(all_features, out_file, pickle.HIGHEST_PROTOCOL) out_file.close() else: # load patches from file .... print("Loading patches and features from file") in_file = open(patch_filename, "rb") patches_labels = pickle.load(in_file) patches_images = pickle.load(in_file) all_features = pickle.load(in_file) in_file.close() end_patch_extraction = time.time() total_training_time = 0.0 total_binarization_time = 0.0 total_evaluation_time = 0.0 cross_validated_classifiers = [] if not OTSU_mode: start_training = time.time() # train classifier using training patches ... count_all_patches = sum([len(lecture_images) for lecture_images in patches_images]) print("Total patches available for training: " + str(count_all_patches)) n_features = len(all_features[0][0]) print("Total Features: " + str(n_features)) # check local performance using cross-validation based on leaving one lecture out conf_matrix = np.zeros((2, 2), dtype=np.int32) avg_train_accuracy = 0.0 rf_max_features = min(rf_max_features, n_features) if run_crossvalidation: for i in range(len(patches_images)): print("Cross-validation fold #" + str(i + 1)) training_data = [] training_labels = [] testing_data = [] testing_labels = [] for k in range(len(patches_images)): if i == k: testing_data += all_features[k] testing_labels += patches_labels[k] else: training_data += all_features[k] training_labels += patches_labels[k] training_data = np.array(training_data) testing_data = np.array(testing_data) print("-> Training Samples: " + str(training_data.shape[0])) print("-> Testing Samples: " + str(testing_data.shape[0])) # classification mode ... # random forest ... classifier = RandomForestClassifier(rf_n_trees, max_features=rf_max_features, max_depth=rf_max_depth, n_jobs=-1) classifier.fit(training_data, training_labels) # keep reference to the n-th fold classifier cross_validated_classifiers.append(classifier) pred_labels = classifier.predict(training_data) train_conf_matrix = np.zeros((2, 2), dtype=np.int32) for train_idx in range(len(training_labels)): train_conf_matrix[training_labels[train_idx], pred_labels[train_idx]] += 1 pixel_accuracy = (train_conf_matrix[0, 0] + train_conf_matrix[1, 1]) / len(training_labels) print("-> Train pixel accuracy: " + str(pixel_accuracy * 100.0)) avg_train_accuracy += pixel_accuracy pred_labels = classifier.predict(testing_data) for test_idx in range(len(testing_labels)): conf_matrix[testing_labels[test_idx], pred_labels[test_idx]] += 1 pixel_accuracy = (conf_matrix[0, 0] + conf_matrix[1, 1]) / count_all_patches avg_train_accuracy /= len(all_features) print("Combined testing confusion matrix: ") print(conf_matrix) print("Final training pixel accuracy: " + str(avg_train_accuracy * 100.0)) print("Final testing pixel accuracy: " + str(pixel_accuracy * 100.0)) # now, use all data to train a classifier for binarization of all frames ... if not os.path.exists(classifier_file) or force_update or retrain_classifier: print("Training classifier using all patches", flush=True) # classification training_data = [] training_labels = [] for k in range(len(patches_images)): training_data += all_features[k] training_labels += patches_labels[k] training_data = np.array(training_data) # Train Random Forest classifier = RandomForestClassifier(rf_n_trees, max_features=rf_max_features, max_depth=rf_max_depth, n_jobs=-1) classifier.fit(training_data, training_labels) print("Saving classifier to file") out_file = open(classifier_file, "wb") pickle.dump(classifier, out_file, pickle.HIGHEST_PROTOCOL) out_file.close() else: print("Loading classifier from file") in_file = open(classifier_file, "rb") classifier = pickle.load(in_file) in_file.close() # release memory (a lot) of elements that will not be used after this point ... all_features = None patches_labels = None training_data = None training_labels = None testing_data = None testing_labels = None end_training = time.time() total_training_time += end_training - start_training # binarize using parameter combination... start_binarizing = time.time() last_lecture = None lecture_offset = -1 training_set = database.get_dataset("training") for idx, bin_kf in enumerate(binarized_keyframes): if bin_kf.lecture != last_lecture: last_lecture = bin_kf.lecture lecture_offset += 1 print("binarizing kf #" + str(idx) + ", from " + training_set[lecture_offset].title, end="\r", flush=True) if OTSU_mode: # ideal BG removal ... #strel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(patch_size), int(patch_size))) #bg_mask = all_keyframes[idx].object_mask > 0 #all_preprocessed[idx][bg_mask] = 0 otsu_t, bin_res = cv2.threshold(all_preprocessed[idx].astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) bin_kf.binary_image = np.zeros((bin_res.shape[0], bin_res.shape[1], 3), dtype=np.uint8) bin_kf.binary_image[:, :, 0] = 255 - bin_res.copy() bin_kf.binary_image[:, :, 1] = bin_kf.binary_image[:, :, 0].copy() bin_kf.binary_image[:, :, 2] = bin_kf.binary_image[:, :, 0].copy() else: # set classifier for binarization .... if run_crossvalidation: # use the classifier that has not seen this image ... ml_binarizer.classifier = cross_validated_classifiers[lecture_offset] else: # use the globally train classifier ml_binarizer.classifier = classifier # ... binarize the pre-processed image ... binary_image = ml_binarizer.preprocessed_binarize(all_preprocessed[idx]) # Do hystheresis filtering ... otsu_t, high_bin = cv2.threshold(all_preprocessed[idx].astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) low_bin = binary_image filtered_bin = 255 - MLBinarizer.binary_hysteresis(low_bin, high_bin) bin_kf.binary_image = np.zeros((filtered_bin.shape[0], filtered_bin.shape[1], 3), dtype=np.uint8) bin_kf.binary_image[:, :, 0] = filtered_bin bin_kf.binary_image[:, :, 1] = filtered_bin bin_kf.binary_image[:, :, 2] = filtered_bin bin_kf.update_binary_cc(False) if config.get("ML_BINARIZER_SAVE_BINARY", True): if OTSU_mode: out_name = "TEMPO_OTSU_baseline_binarized_" + str(idx) + ".png" else: out_name = "TEMPO_rf_baseline_binarized_" + str(idx) + ".png" cv2.imwrite(out_name, bin_kf.binary_image) end_binarizing = time.time() total_binarization_time += end_binarizing - start_binarizing # run evaluation metrics ... print("Computing final evaluation metrics....") # Summary level metrics .... start_evaluation = time.time() EvalParameters.UniqueCC_global_tran_window = 1 EvalParameters.UniqueCC_min_precision = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.90, 0.95] EvalParameters.UniqueCC_min_recall = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.90, 0.95] EvalParameters.Report_Summary_Show_Counts = False EvalParameters.Report_Summary_Show_AVG_per_frame = False EvalParameters.Report_Summary_Show_Globals = True all_scope_metrics, scopes = Evaluator.compute_summary_metrics(fake_segments, all_keyframes, fake_unique_groups, fake_cc_group, fake_segments, binarized_keyframes, False) for scope in scopes: print("") print("Metrics for scope: " + scope) print(" \t \tRecall\t \t \tPrecision") print("Min R.\tMin P.\tE + P\tE. Only\tP. Only\tE + P\tE. Only\tP. Only\tBG. %\tNo BG P.") scope_metrics = all_scope_metrics[scope] recall_percent_row = "{0:.2f}\t{1:.2f}\t{2:.2f}\t{3:.2f}\t{4:.2f}" prec_percent_row = "{0:.2f}\t{1:.2f}\t{2:.2f}\t{3:.2f}\t{4:.2f}" for all_metrics in scope_metrics: metrics = all_metrics["recall_metrics"] recall_str = recall_percent_row.format(all_metrics["min_cc_recall"] * 100.0, all_metrics["min_cc_precision"] * 100.0, metrics["recall"] * 100.0, metrics["only_exact_recall"] * 100.0, metrics["only_partial_recall"] * 100.0) metrics = all_metrics["precision_metrics"] prec_str = prec_percent_row.format(metrics["precision"] * 100.0, metrics["only_exact_precision"] * 100.0, metrics["only_partial_precision"] * 100.0, metrics["global_bg_unmatched"] * 100.0, metrics["no_bg_precision"] * 100.0) print(recall_str + "\t" + prec_str) # pixel level metrics pixel_metrics = Evaluator.compute_pixel_binary_metrics(all_keyframes, binarized_keyframes) print("Pixel level metrics") for key in sorted(pixel_metrics.keys()): print("{0:s}\t{1:.2f}".format(key, pixel_metrics[key] *100.0)) end_evaluation = time.time() total_evaluation_time += end_evaluation - start_evaluation end_everything = time.time() print("Total loading time: " + TimeHelper.secondsToStr(end_loading - start_loading)) print("Total preprocessing time: " + TimeHelper.secondsToStr(end_preprocessing - start_preprocessing)) print("Total patch extraction time: " + TimeHelper.secondsToStr(end_patch_extraction - start_patch_extraction)) print("Total training time: " + TimeHelper.secondsToStr(total_training_time)) print("Total binarization time: " + TimeHelper.secondsToStr(total_binarization_time)) print("Total evaluation time: " + TimeHelper.secondsToStr(total_evaluation_time)) print("Total Time: " + TimeHelper.secondsToStr(end_everything - start_loading))
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # inputs / output paths output_dir = config.get_str("OUTPUT_PATH") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") keyframes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_KEYFRAMES_DIR") fg_mask_dir = output_dir + "/" + config.get_str( "SPEAKER_FG_ESTIMATION_MASK_DIR") summaries_dir = output_dir + "/" + database.output_summaries os.makedirs(summaries_dir, exist_ok=True) summary_prefix = summaries_dir + "/" + config.get_str( "SPEAKER_SUMMARY_PREFIX") + "_" + database.name + "_" # current dataset .... dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] print("... preparing summary generator ...") summ_generator = SummaryGenerator(config) for current_lecture in testing_set: print("") print("Processing: " + current_lecture.title) # get all inputs .... # read segment data .... segments_data_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_segment_data = MiscHelper.dump_load(segments_data_filename) # read key-frames data ... keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_keyframes_data = MiscHelper.dump_load(keyframes_data_filename) # read mask data ... fg_mask_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" fg_mask_png = MiscHelper.dump_load(fg_mask_filename) fg_mask = cv2.imdecode(fg_mask_png, cv2.IMREAD_GRAYSCALE) output_prefix = summary_prefix + current_lecture.title.lower() summ_generator.export_summary(database, current_lecture, video_segment_data, video_keyframes_data, fg_mask, output_prefix)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return unique_label = config.get("SPEAKER_VALID_ACTIONS") dataset_name = config.get("SPEAKER_TRAINING_SET_NAME") training_set = database.datasets[dataset_name] remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE") normalization_bone = config.get("SPEAKER_NORMALIZATION_BONE") # pair of norm factor points # get the paths to the outputs from previous scripts .... output_dir = config.get_str("OUTPUT_PATH") action_object_name = config.get_str("SPEAKER_ACTION_MAIN_OBJECT", "speaker") action_segment_output_dir = config.get_str("SPEAKER_ACTION_SEGMENT_OUTPUT_DIR", ".") segments_output_prefix = output_dir + "/" + action_segment_output_dir + "/" + database.name + "_" # the per lecture openpose CSV lecture_filename_prefix = output_dir + "/" + config.get_str("OPENPOSE_OUTPUT_DIR_CSV") + "/" + database.name + "_" output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") os.makedirs(output_segment_dir, exist_ok=True) # First .... cache all OpenPose CSV data per training lecture .... data_per_lecture = {} for lecture in training_set: lecture_filename = lecture_filename_prefix + lecture.title + ".csv" print("Loading data for: " + lecture_filename) segments, data = LecturePoseSegments.InitializeFromLectureFile(lecture_filename, normalization_bone, remove_confidence) data_per_lecture[lecture.title.lower()] = { "segments": segments, "data": data } # read the training frame segments info file segment_filename = segments_output_prefix + dataset_name + "_" + action_object_name + ".csv" speaker_seg_train = pd.read_csv(segment_filename) # frame segment info of training data of object speaker speaker_seg_train = speaker_seg_train.values # Split the OpenPose Data based on the given segments ... for vid_name, f_start, f_end, label in speaker_seg_train: vid_name = vid_name.lower() # print((vid_name, f_start, f_end, label)) # if label is not in the main 8 labels, omit it if label not in unique_label: continue if not vid_name in data_per_lecture: print("Invalid lecture name found: " + vid_name) continue temp_data = data_per_lecture[vid_name]["data"][f_start:f_end + 1, :] temp_pose_segment_data = PoseSegmentData(f_start, f_end, label, temp_data) data_per_lecture[vid_name]["segments"].segments.append(temp_pose_segment_data) # save to file ... for lecture in training_set: output_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" MiscHelper.dump_save(data_per_lecture[lecture.title.lower()]["segments"], output_filename) print("Data Segment Saving Done.") return
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") print("\tgt_labels:\tuse ground truth action labels (Default= False)") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") video_metadata_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_VIDEO_META_DATA_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") os.makedirs(temporal_segments_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] valid_actions = config.get("SPEAKER_VALID_ACTIONS") if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False for current_lecture in testing_set: info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + current_lecture.title + ".csv" video_info = MiscHelper.dump_load(info_filename) segmenter = VideoSegmenter.FromConfig(config, video_info["width"], video_info["height"]) # read label data .... prob_info = ResultReader.read_actions_probabilities_file( proba_filename, valid_actions) segments, gt_actions, pred_actions, prob_actions = prob_info # read bbox data ... bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv" frame_idxs, frame_actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file( bbox_filename, use_ground_truth) # (splits_frames, video_keyframes) video_data = segmenter.get_keyframes(pred_actions, segments, frame_idxs, body_bboxes, rh_bboxes) print("") print("video key_frames") print(video_data[0]) print(video_data[1]) print("") output_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(video_data, output_filename)