def start_image_list_preprocessing(self, src_dir, get_worker_function, get_results_function, img_extension='.png', frames_limit=0, verbose=False): for lecture in self.database.lectures: self.current_lecture = lecture _, out_file, skip = self.get_lecture_params(lecture) if skip: continue # create a worker ... worker = get_worker_function(self) # execute the actual process .... processor = ImageListProcessor('{}{}'.format( src_dir, self.current_lecture.title), img_extension=img_extension) if verbose: print('Opening exported image folder {}{}'.format( src_dir, self.current_lecture.title)) if "forced_width" in lecture.parameters: processor.force_resolution(lecture.parameters["forced_width"], lecture.parameters["forced_height"]) processor.doProcessing(worker, frames_limit, verbose) # 0 # save results if self.output_temp_prefix is not None: results = get_results_function(worker) MiscHelper.dump_save( results, self.temp_dir + '/' + self.output_temp_prefix + out_file)
def start_video_processing(self, frames_per_second, get_worker_function, get_results_function, frames_limit=0, verbose=False, force_no_seek=False): for lecture in self.database.lectures: self.current_lecture = lecture m_videos, out_file, skip = self.get_lecture_params(lecture) if skip: continue # create a worker ... worker = get_worker_function(self) # execute the actual process .... processor = VideoProcessor(m_videos, frames_per_second) if "forced_width" in lecture.parameters: processor.force_resolution(lecture.parameters["forced_width"], lecture.parameters["forced_height"]) processor.doProcessing(worker, frames_limit, verbose, force_no_seek) # 0 # save results if self.output_temp_prefix is not None: results = get_results_function(worker) MiscHelper.dump_save( results, self.temp_dir + '/' + self.output_temp_prefix + out_file)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config [dataset]".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") print("\tdataset:\tDataset to run (Default= Training)") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") video_metadata_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_VIDEO_META_DATA_DIR") os.makedirs(video_metadata_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] for current_lecture in testing_set: print("") print("processing: " + current_lecture.title) # print(all_keyframes) # the simple frame sampling worker .. worker = SimpleFrameSampler() # main video file names m_videos = [ config.get_str("VIDEO_FILES_PATH") + "/" + video["path"] for video in current_lecture.main_videos ] video_info = {} if "forced_width" in current_lecture.parameters: video_info["width"] = current_lecture.parameters["forced_width"] video_info["height"] = current_lecture.parameters["forced_height"] else: # execute the actual process .... processor = SequentialVideoSampler(m_videos, [0]) processor.doProcessing(worker, 0, True) # 0 video_info["width"] = worker.width video_info["height"] = worker.height output_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(video_info, output_filename)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE") normalization_bone = config.get("SPEAKER_NORMALIZATION_BONE") # pair of norm factor points # get the paths to the outputs from previous scripts .... output_dir = config.get_str("OUTPUT_PATH") # the per lecture openpose CSV lecture_filename_prefix = output_dir + "/" + config.get_str("OPENPOSE_OUTPUT_DIR_CSV") + "/" + database.name + "_" output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") os.makedirs(output_segment_dir, exist_ok=True) segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH") for lecture in testing_set: lecture_filename = lecture_filename_prefix + lecture.title + ".csv" print("Loading data for: " + lecture_filename) # get the corresponding data for this lecture ... lec_segments, lecture_data = LecturePoseSegments.InitializeFromLectureFile(lecture_filename, normalization_bone, remove_confidence) # sequential sampling for pose segments vid_len = lecture_data.shape[0] for ind in range(0, int(vid_len / segment_length)): f_start = ind * segment_length f_end = f_start + segment_length - 1 temp_data = lecture_data[f_start:f_end + 1, :] lec_segments.segments.append(PoseSegmentData(f_start, f_end, None, temp_data)) # save .... output_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" MiscHelper.dump_save(lec_segments, output_filename)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") classifier_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_DIR") os.makedirs(classifier_dir, exist_ok=True) classifier_filename = classifier_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_FILENAME") dataset_name = config.get("SPEAKER_TRAINING_SET_NAME") training_set = database.datasets[dataset_name] training_titles = [lecture.title.lower() for lecture in training_set] # get classifier parameters rf_n_trees = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_TREES", 64) rf_depth = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_DEPTH", 16) # read all training data available .... train_dataset = {} for lecture in training_set: input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" train_dataset[lecture.title.lower()] = MiscHelper.dump_load( input_filename) train_x, train_y, train_frame_infos = PoseFeatureExtractor.combine_datasets( training_titles, train_dataset) # classify and confusion matrix part clf = RandomForestClassifier(n_estimators=rf_n_trees, max_depth=rf_depth, random_state=0) clf = clf.fit(train_x, train_y) MiscHelper.dump_save(clf, classifier_filename)
def getCrossings(self, cc, horizontal): if horizontal: step = cc.normalized.shape[1] / float(self.count_crossings + 1) else: step = cc.normalized.shape[0] / float(self.count_crossings + 1) counts = [] mins = [] maxs = [] for i in range(self.count_crossings): pos = int((i + 1) * step) # the crossing is threated in terms of boolean intervals... booleans = [] if horizontal: #horizontal -> y fixed and x moves for x in range(cc.normalized.shape[1]): booleans.append(cc.normalized[pos, x] > 128.0) else: #vertical -> x fixed and y moves for y in range(cc.normalized.shape[0]): booleans.append(cc.normalized[y, pos] > 128.0) # find the intervals... intervals = MiscHelper.findBooleanIntervals(booleans, True) # now, get the middle points for each interval... midPoints = MiscHelper.intervalMidPoints(intervals) # normalize values midPoints = MiscHelper.scaleValues(midPoints, 0, cc.normalized.shape[0] - 1, -1, 1) counts.append(len(intervals)) if len(intervals) > 0: mins.append(midPoints[0]) maxs.append(midPoints[-1]) else: mins.append(1.1) maxs.append(-1.1) counts = MiscHelper.scaleValues(counts, 0, 10, -3, 3) return counts + mins + maxs
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") dataset_name = config.get("SPEAKER_TRAINING_SET_NAME") training_set = database.datasets[dataset_name] # prepare the feature extractor ... feature_points = config.get("SPEAKER_ACTION_FEATURE_POINTS") segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH", 15) feat_extractor = PoseFeatureExtractor(feature_points, segment_length) features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") os.makedirs(features_dir, exist_ok=True) # for each file ... get features ... for lecture in training_set: input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" output_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" lecture_pose_segments = MiscHelper.dump_load(input_filename) vid_data = feat_extractor.get_feature_dataset(lecture_pose_segments) MiscHelper.dump_save(vid_data, output_filename) return
def FromUniformSample(database, lecture, step, sample_name, binary_source): # load output from pipeline ... lecture_suffix = str(lecture.id) + ".dat" # load binary images tempo_binary_filename = database.output_temporal + "/" + binary_source + lecture_suffix binary_data = MiscHelper.dump_load(tempo_binary_filename) original_frame_times, frame_indices, frame_compressed = binary_data # take a sample frame_times = [time for time in original_frame_times[::step]] frame_indices = [idx for idx in frame_indices[::step]] frame_compressed = [frame for frame in frame_compressed[::step]] print("Expanding loaded frames .... ") binary_frames = Helper.decompress_binary_images(frame_compressed) # segments .... output_prefix = database.output_summaries + "/" + sample_name + "_" + database.name + "_" + lecture.title.lower( ) print("Saving data to: " + output_prefix) # in abs frame indices ... intervals = [] abs_intervals = [] for idx, comp_frame in enumerate(binary_frames): if idx == 0: curr_start = int(frame_indices[idx] / 2) abs_start = frame_times[idx] / 2.0 else: curr_start = int( (frame_indices[idx - 1] + frame_indices[idx]) / 2) abs_start = (frame_times[idx - 1] + frame_times[idx]) / 2.0 if idx + 1 < len(frame_indices): curr_end = int( (frame_indices[idx + 1] + frame_indices[idx]) / 2) abs_end = (frame_times[idx + 1] + frame_times[idx]) / 2.0 else: curr_end = frame_indices[idx] abs_end = frame_times[idx] # invert binarization ... binary_frames[idx] = 255 - comp_frame intervals.append((curr_start, curr_end)) abs_intervals.append((abs_start, abs_end)) KeyframeExporter.Export(output_prefix, database, lecture, intervals, abs_intervals, frame_indices, frame_times, binary_frames)
def prepare(args): # load database try: database = MetaDataDB.from_file(args['database']) except: print("Invalid AccessMath database file") return TangentV_Helper.VisualizerServer = database.indexing.visualization_server # ... Load 3D structures .... print("Loading CC indices per lecture ... ") for lecture in database.lectures: struct_filename = database.output_temporal + '/' + Parameters.Output_ST3D + str(lecture.id) + ".dat" TangentV_Helper.cache_3DSTs[lecture.title] = MiscHelper.dump_load(struct_filename)
def initialize(self): # load database info try: self.database = MetaDataDB.from_file(self.database_file) except: print("Invalid database file") return False self.params = MiscHelper.optional_parameters(self.raw_params, 0) # process the specified dataset(s) if "d" in self.params: if not isinstance(self.params["d"], list): self.params["d"] = [self.params["d"]] valid_datasets = [] for name in self.params["d"]: dataset = self.database.get_dataset(name) if dataset is None: print("Invalid Dataset name <" + name + ">") return False else: valid_datasets.append(dataset) self.params["d"] = valid_datasets # process only the specified lectures if "l" in self.params: if not isinstance(self.params["l"], list): self.params["l"] = [self.params["l"]] self.params["l"] = [name.lower() for name in self.params["l"]] # override the input prefix if "i" in self.params: self.input_temp_prefix = self.params["i"] # override the output prefix if "o" in self.params: self.input_temp_prefix = self.params["o"] self.temp_dir = self.database.output_temporal self.out_dir = self.database.output_preprocessed self.img_dir = self.database.output_images # success loading database file .. return True
def selectMotionlessFrames(self): #maximum motion allowed threshold = 0 booleans = [] for idx, m in enumerate(self.motion_detected): booleans.append(m.count_changes <= threshold) intervals = MiscHelper.findBooleanIntervals(booleans, True) #only consider intervals of at least 3 frames candidates = [] for ini, end in intervals: #check.... if end - ini >= 2: #pick the frame in the middle of the interval middle = self.motion_detected[int((end + ini) / 2.0)] candidates.append((middle.video_index, middle.time)) return candidates
def getNonblockedIntervals(self, region_box, max_width, max_height, init_index, end_time): #Find a frame where the found region has no motion around pos = init_index blocked_list = [] while pos < len(self.motion_detected) and \ self.motion_detected[pos].absolute_time < end_time: #check motion[pos] to see if main_region is blocked blocked = self.motion_detected[pos].isBlockingRegion( region_box, max_width, max_height, 3.0, 3.0) #add to boolean list blocked_list.append(blocked) pos += 1 #now find the intervals where it is not obstruded... intervals = MiscHelper.findBooleanIntervals(blocked_list, False) return intervals
def start_input_processing(self, process_function): for lecture in self.database.lectures: self.current_lecture = lecture m_videos, lecture_file, skip = self.get_lecture_params(lecture) if skip: continue # read temporal file if self.input_temp_prefix is None: # null-input process (convenient way to process lectures) input_data = None else: if not isinstance(self.input_temp_prefix, list): input_data = MiscHelper.dump_load(self.temp_dir + '/' + self.input_temp_prefix + lecture_file) else: input_data = [] for temp_prefix in self.input_temp_prefix: input_data.append( MiscHelper.dump_load(self.temp_dir + '/' + temp_prefix + lecture_file)) # execute the actual process .... timer = TimeHelper() timer.startTimer() results = process_function(self, input_data) timer.endTimer() print("Process Finished in: " + timer.totalElapsedStamp()) # save results if self.output_temp_prefix is not None: if not isinstance(self.output_temp_prefix, list): MiscHelper.dump_save( results, self.temp_dir + '/' + self.output_temp_prefix + lecture_file) else: for out_idx, temp_prefix in enumerate( self.output_temp_prefix): MiscHelper.dump_save( results[out_idx], self.temp_dir + '/' + temp_prefix + lecture_file)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("\n\tWhere:") print( "\tgt_lablels:\t(Optional) Set to 1 to use Ground Truth labels instead of predictions" ) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] valid_actions = config.get("SPEAKER_VALID_ACTIONS") # get the paths to the outputs from previous scripts .... output_dir = config.get_str("OUTPUT_PATH") output_segment_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") os.makedirs(output_bboxes_dir, exist_ok=True) remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE") speaker_right_handed = config.get("SPEAKER_IS_RIGHT_HANDED") n_joints_body = 25 n_joints_hand = 21 if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False col_name = [ 'frame_id', ('ground_truth' if use_ground_truth else 'pred_label'), 'body_xmin', 'body_xmax', 'body_ymin', 'body_ymax', 'rh_xmin', 'rh_xmax', 'rh_ymin', 'rh_ymax' ] segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH") # load data + label for lecture in testing_set: input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" lec_segments = MiscHelper.dump_load(input_filename) if use_ground_truth: labels = lec_segments.get_all_labels() else: input_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv" _, _, labels, _ = ResultReader.read_actions_probabilities_file( input_proba_filename, valid_actions) output_filename = output_bboxes_dir + "/" + database.name + "_" + lecture.title + ".csv" output_file = ResultRecorder(output_filename) output_file.write_headers(col_name) # get bbox for skeleton and right hands from all segments frames = [] segment_labels = [] body_bbox = [] rh_bbox = [] for ind in range(0, len(lec_segments.segments)): # get the pose data ... if not remove_confidence: # the data contains confidence ... which needs to be removed at this point ... base_pose_data = lec_segments.segments[ind].pose_data total_joints = n_joints_body + n_joints_hand * 2 seg_pose_data = np.zeros( (base_pose_data.shape[0], total_joints * 2), dtype=base_pose_data.dtype) seg_pose_data[:, ::2] = base_pose_data[:, ::3] seg_pose_data[:, 1::2] = base_pose_data[:, 1::3] else: # confidence has been removed .... seg_pose_data = lec_segments.segments[ind].pose_data body_features = seg_pose_data[:, 0:n_joints_body * 2] if speaker_right_handed: # get right hand data rh_features = seg_pose_data[:, (n_joints_body + n_joints_hand) * 2:] else: # use left hand data rh_features = seg_pose_data[:, n_joints_body * 2:(n_joints_body + n_joints_hand) * 2] # get body bboxes and add to the list .... temp_body_bbox = PoseSegmentData.get_bbox_frame_data( body_features, 2) body_bbox += temp_body_bbox.tolist() # get hand bboxes and add to the list .... temp_rh_bbox = PoseSegmentData.get_bbox_frame_data(rh_features, 2) rh_bbox += temp_rh_bbox.tolist() # add frame range .... f_start = lec_segments.segments[ind].frame_start f_end = lec_segments.segments[ind].frame_end temp_frames = list(range(f_start, f_end + 1)) frames += temp_frames # add label .... temp_label = [[labels[ind]] for _ in range(segment_length) ] # remove seg_len, you don't need this segment_labels += temp_label paras = frames, segment_labels, body_bbox, rh_bbox output_file.record_results(paras)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # inputs / output paths output_dir = config.get_str("OUTPUT_PATH") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") keyframes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_KEYFRAMES_DIR") fg_mask_dir = output_dir + "/" + config.get_str( "SPEAKER_FG_ESTIMATION_MASK_DIR") summaries_dir = output_dir + "/" + database.output_summaries os.makedirs(summaries_dir, exist_ok=True) summary_prefix = summaries_dir + "/" + config.get_str( "SPEAKER_SUMMARY_PREFIX") + "_" + database.name + "_" # current dataset .... dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] print("... preparing summary generator ...") summ_generator = SummaryGenerator(config) for current_lecture in testing_set: print("") print("Processing: " + current_lecture.title) # get all inputs .... # read segment data .... segments_data_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_segment_data = MiscHelper.dump_load(segments_data_filename) # read key-frames data ... keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_keyframes_data = MiscHelper.dump_load(keyframes_data_filename) # read mask data ... fg_mask_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" fg_mask_png = MiscHelper.dump_load(fg_mask_filename) fg_mask = cv2.imdecode(fg_mask_png, cv2.IMREAD_GRAYSCALE) output_prefix = summary_prefix + current_lecture.title.lower() summ_generator.export_summary(database, current_lecture, video_segment_data, video_keyframes_data, fg_mask, output_prefix)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") print("\tgt_labels:\tuse ground truth action labels (Default= False)") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") video_metadata_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_VIDEO_META_DATA_DIR") fg_mask_dir = output_dir + "/" + config.get_str( "SPEAKER_FG_ESTIMATION_MASK_DIR") os.makedirs(fg_mask_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] speaker_exp_factor = config.get_float( "SPEAKER_FG_ESTIMATION_SPK_EXPANSION_FACTOR") min_mask_frames = config.get_int("SPEAKER_FG_ESTIMATION_MIN_MASK_FRAMES") mask_exp_radius = config.get_int( "SPEAKER_FG_ESTIMATION_MASK_EXPANSION_RADIUS") if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False for current_lecture in testing_set: bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv" frame_idxs, actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file( bbox_filename, use_ground_truth) info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_info = MiscHelper.dump_load(info_filename) fg_estimator = ForegroundEstimator(video_info["width"], video_info["height"], speaker_exp_factor, min_mask_frames, mask_exp_radius) fg_mask = fg_estimator.get_mask(frame_idxs, actions, body_bboxes, rh_bboxes) # cv2.imshow(current_lecture.id, fg_mask) # cv2.waitKey() flag, raw_data = cv2.imencode(".png", fg_mask) output_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(raw_data, output_filename)
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") keyframes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_KEYFRAMES_DIR") os.makedirs(keyframes_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] for current_lecture in testing_set: # read segment data .... input_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" video_segment_data = MiscHelper.dump_load(input_filename) # key-frames that must be extracted from video ... segments, keyframes_per_segment = video_segment_data all_keyframes = [] for segment_keyframes in keyframes_per_segment: all_keyframes += [ keyframe_idx for keyframe_idx, bbox in segment_keyframes ] print("") print("processing: " + current_lecture.title) # print(all_keyframes) # the simple frame sampling worker .. worker = SimpleFrameSampler() # main video file names m_videos = [ config.get_str("VIDEO_FILES_PATH") + "/" + video["path"] for video in current_lecture.main_videos ] # execute the actual process .... processor = SequentialVideoSampler(m_videos, all_keyframes) if "forced_width" in current_lecture.parameters: processor.force_resolution( current_lecture.parameters["forced_width"], current_lecture.parameters["forced_height"]) processor.doProcessing(worker, 0, True) # 0 sampled_frame_data = worker.frame_times, worker.frame_indices, worker.compressed_frames # save results keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(sampled_frame_data, keyframes_data_filename)
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return unique_label = config.get("SPEAKER_VALID_ACTIONS") dataset_name = config.get("SPEAKER_TRAINING_SET_NAME") training_set = database.datasets[dataset_name] remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE") normalization_bone = config.get("SPEAKER_NORMALIZATION_BONE") # pair of norm factor points # get the paths to the outputs from previous scripts .... output_dir = config.get_str("OUTPUT_PATH") action_object_name = config.get_str("SPEAKER_ACTION_MAIN_OBJECT", "speaker") action_segment_output_dir = config.get_str("SPEAKER_ACTION_SEGMENT_OUTPUT_DIR", ".") segments_output_prefix = output_dir + "/" + action_segment_output_dir + "/" + database.name + "_" # the per lecture openpose CSV lecture_filename_prefix = output_dir + "/" + config.get_str("OPENPOSE_OUTPUT_DIR_CSV") + "/" + database.name + "_" output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR") os.makedirs(output_segment_dir, exist_ok=True) # First .... cache all OpenPose CSV data per training lecture .... data_per_lecture = {} for lecture in training_set: lecture_filename = lecture_filename_prefix + lecture.title + ".csv" print("Loading data for: " + lecture_filename) segments, data = LecturePoseSegments.InitializeFromLectureFile(lecture_filename, normalization_bone, remove_confidence) data_per_lecture[lecture.title.lower()] = { "segments": segments, "data": data } # read the training frame segments info file segment_filename = segments_output_prefix + dataset_name + "_" + action_object_name + ".csv" speaker_seg_train = pd.read_csv(segment_filename) # frame segment info of training data of object speaker speaker_seg_train = speaker_seg_train.values # Split the OpenPose Data based on the given segments ... for vid_name, f_start, f_end, label in speaker_seg_train: vid_name = vid_name.lower() # print((vid_name, f_start, f_end, label)) # if label is not in the main 8 labels, omit it if label not in unique_label: continue if not vid_name in data_per_lecture: print("Invalid lecture name found: " + vid_name) continue temp_data = data_per_lecture[vid_name]["data"][f_start:f_end + 1, :] temp_pose_segment_data = PoseSegmentData(f_start, f_end, label, temp_data) data_per_lecture[vid_name]["segments"].segments.append(temp_pose_segment_data) # save to file ... for lecture in training_set: output_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle" MiscHelper.dump_save(data_per_lecture[lecture.title.lower()]["segments"], output_filename) print("Data Segment Saving Done.") return
def computeVisualAlignment(m_videos, a_videos, time_offset, motionless, save_frames, extraction_method_id): #distribute the selection of motionless frames... selected = MiscHelper.distribute_values(Aligner.ALIGNMENT_SAMPLE, 0, len(motionless) - 1) #create the list.. frame_list = [] for idx in selected: frame_list.append(motionless[idx]) #extract the motionless frames from main videos frames = Loader.extractFramesRelative(m_videos, frame_list) if save_frames: for idx, f in enumerate(frames): abs_time, frame = f cv2.imwrite("out/main_" + str(idx) + ".jpg", frame) #calculate the absolute time for the corresponding frames #on the auxiliar video. Consider the time difference between videos times = [(abs_time - time_offset) for abs_time, frame in frames] #extract the motionless frames from auxiliar videos aux_frames = Loader.extractFramesAbsolute(a_videos, times) if save_frames: for idx, frame in enumerate(aux_frames): cv2.imwrite("out/auxiliar_" + str(idx) + ".jpg", frame) #find the visual correspondence between pairs of key frames matches_aux = [] matches_main = [] aux_boxes = [] main_boxes = [] all_content_main = [] all_content_aux = [] #...first... extract the content from each pair of frames... for i in range(min(Aligner.ALIGNMENT_SAMPLE, len(frames))): #get the current key frames abs_time, frame_main = frames[i] frame_aux = aux_frames[i] print("Extracting content #" + str(i + 1) + " ... (Main: " + TimeHelper.stampToStr(abs_time) + " - Aux: " + TimeHelper.stampToStr(times[i]) + ")") #from the main key frame, extract content on the board main_box, content_main = Binarizer.frameContentBinarization( frame_main, extraction_method_id) main_boxes.append(main_box) #from the auxiliary key frame, extract content on the board aux_box, content_aux = Binarizer.frameContentBinarization( frame_aux, extraction_method_id) aux_boxes.append(aux_box) #add to list... all_content_main.append(content_main) all_content_aux.append(content_aux) #...then, extract the alignment.... keep highest score... all_scores = [] for i in range(min(Aligner.ALIGNMENT_SAMPLE, len(frames))): print("Testing Alignment #" + str(i + 1) + " ... ") #corresponding frames.... content_aux = all_content_aux[i] content_main = all_content_main[i] #Extract a set of good matches between these two images.... # where object = aux content from mimio, to align with main content # scene = main content to which the change regions will be projected aux_list, main_list = VisualAlignment.getSURFMatchingPoints( content_aux, content_main, Aligner.SURF_THRESHOLD) #generate projection based on these points... current_projection, mask = VisualAlignment.generateProjection( aux_list, main_list) #calculate score... score = VisualAlignment.getProjectionScore(current_projection, all_content_main, all_content_aux) #print( str(i) + " => " + str(score) ) all_scores.append((score, i, current_projection)) #add to the total list of points... matches_aux.append(aux_list) matches_main.append(main_list) #print( "ON " + str(i) + " where found " + str(len(aux_list) ) + " matches" ) all_scores = sorted(all_scores, reverse=True) #current best projection is the one with the top score... max_score = all_scores[0][0] all_matches_aux = matches_aux[all_scores[0][1]] all_matches_main = matches_main[all_scores[0][1]] best_projection = all_scores[0][2] #now, try to improve the quality of the projection by adding some keypoints from #candidate alignments with high scores and computing a new combined projection #for the list of combined keypoint matches... new_score = max_score pos = 1 while new_score >= max_score and pos < len(all_scores): #add keypoints to the combined list... current_aux = all_matches_aux + matches_aux[all_scores[pos][1]] current_main = all_matches_main + matches_main[all_scores[pos][1]] #generate the new projection... current_projection, mask = VisualAlignment.generateProjection( current_aux, current_main) #get score for combined projection... new_score = VisualAlignment.getProjectionScore( current_projection, all_content_main, all_content_aux) #check if score improved... if new_score >= max_score: #new best projection found.... max_score = new_score all_matches_aux += aux_list[all_scores[pos][1]] all_matches_main += main_list[all_scores[pos][1]] best_projection = current_projection pos += 1 #Get the final alignment projection = best_projection print("Best Alignment Score: " + str(max_score)) """ # Un-comment to output alignment images for i in range(len(all_content_main)): content_main = all_content_main[i] content_aux = all_content_aux[i] proj_img = np.zeros( (content_main.shape[0], content_main.shape[1]), dtype=content_main.dtype ) cv.WarpPerspective( cv.fromarray( content_aux ), cv.fromarray(proj_img), cv.fromarray( projection ) ) result_image = np.zeros( (content_main.shape[0], content_main.shape[1], 3) ) result_image[:,:,2] = content_main result_image[:,:,1] = proj_img #cv2.imshow('img',result_image) cv2.imwrite( 'DEBUG_MAIN_' + str(i) + '.bmp', content_main ) cv2.imwrite( 'DEBUG_AUX_' + str(i) + '.bmp', content_aux ) cv2.imwrite( 'DEBUG_PROJECTION_' + str(i) + '.bmp' , result_image ) """ #average of the boxes of the whiteboard main_box = MiscHelper.averageBoxes(main_boxes) aux_box = MiscHelper.averageBoxes(aux_boxes) #store them in a single object... visual_alignment = VisualAlignment() # ... main size... visual_alignment.main_width = frames[0][1].shape[1] visual_alignment.main_height = frames[0][1].shape[0] #.... main board box ... visual_alignment.main_box = main_box # ... aux size .... visual_alignment.aux_width = aux_frames[0].shape[1] visual_alignment.aux_height = aux_frames[0].shape[0] #... aux board box... visual_alignment.aux_box = aux_box #... projection .... visual_alignment.projection = projection return visual_alignment
def main(): if len(sys.argv) < 2: print("Usage") print("\tpython {0:s} config".format(sys.argv[0])) return # initialization # config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return # get paths and other configuration parameters .... output_dir = config.get_str("OUTPUT_PATH") features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR") action_class_output_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_OUTPUT_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") os.makedirs(action_class_output_dir, exist_ok=True) os.makedirs(action_class_probabilities_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] # load the saved model for action classification ... classifier_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_DIR") classifier_filename = classifier_dir + "/" + config.get_str( "SPEAKER_ACTION_CLASSIFIER_FILENAME") clf = MiscHelper.dump_load(classifier_filename) csv_col = ['frame_start', 'frame_end', 'prediction'] for lecture in testing_set: input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle" output_actions_filename = action_class_output_dir + "/" + database.name + "_" + lecture.title + ".csv" output_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv" # load data ... data_xy = MiscHelper.dump_load(input_filename) # classier predict .... test_x = data_xy["features"] y_pred = clf.predict(test_x) y_pred_re = y_pred.reshape((y_pred.shape[0], 1)) # save prediction result output_csv = ResultRecorder(output_actions_filename) output_csv.write_headers(csv_col) # the function accepts a list of columns to save on CSV ... # by transposing, we make the standard list of rows into a list of columns for the function ... paras = np.hstack((data_xy["frame_infos"], y_pred[:, None])).transpose() output_csv.record_results(paras) # save label probabilities all_classes = clf.classes_ y_prob = clf.predict_proba(test_x) infos = np.concatenate((y_pred_re, y_prob), axis=1) output_csv = ResultRecorder(output_proba_filename) output_csv.write_headers(csv_col + all_classes.tolist()) # ... IDEM .... paras = np.hstack((data_xy["frame_infos"], infos)).transpose() output_csv.record_results(paras)
def ExportVideo(database, lecture, binary_source, video_prefix, invert_binary, skip_interpolation=0, workers=7, block_size=100): # .... paths ... lecture_sufix = str(lecture.id) + ".dat" tempo_binary_filename = database.output_temporal + "/" + binary_source + lecture_sufix lecture_str = video_prefix + "_" + database.name + "_" + lecture.title.lower( ) temporary_prefix = database.output_images + "/" + lecture_str + "_" first_video_filename = lecture.main_videos[0]["path"] # load binary images binary_data = MiscHelper.dump_load(tempo_binary_filename) original_frame_times, frame_indices, frame_compressed = binary_data print("Generating Temporary Files") with ThreadPoolExecutor(max_workers=workers) as executor: n_blocks = int(math.ceil(frame_indices[-1] / block_size)) pref_list = [temporary_prefix] * n_blocks frame_list = [frame_compressed] * n_blocks idx_list = [frame_indices] * n_blocks inv_list = [invert_binary] * n_blocks start_list = [idx * block_size for idx in range(n_blocks)] block_list = [block_size] * n_blocks skip_list = [skip_interpolation] * n_blocks total_frames = n_blocks * block_size for idx, _ in enumerate( executor.map(KeyframeExporter.ExpandGenerateSaveTemp, pref_list, frame_list, idx_list, inv_list, start_list, block_list, skip_list)): prc_progress = ((idx + 1) * 100) / n_blocks print("-> Exporting: {0:.4f}% (Block {1:d} of {2:d})".format( prc_progress, idx + 1, n_blocks), end="\r", flush=True) print("", flush=True) # find source sampling frames per second capture = cv2.VideoCapture(first_video_filename) video_fps = capture.get(cv2.CAP_PROP_FPS) source_videos_str = " ".join( ["-i " + video["path"] for video in lecture.main_videos]) audio_filter_complex = " ".join([ "[{0:d}:a:0]".format(idx + 1) for idx in range(len(lecture.main_videos)) ]) audio_filter_complex += " concat=n={0:d}:v=0:a=1 [audio]".format( len(lecture.main_videos)) video_output = database.output_videos + "/" + lecture_str + ".mp4" input_framerate = video_fps output_framerate = video_fps video_inputs = "-hwaccel dxva2 -framerate {0:.2f} -start_number 0 -i {1:s}%d.png".format( input_framerate, temporary_prefix) audio_inputs = "{0:s} -filter_complex \"{1:s}\"".format( source_videos_str, audio_filter_complex) output_flags = "-pix_fmt yuv420p -vf \"scale=trunc(iw/2)*2:trunc(ih/2)*2\" " output_flags += "-s:v 1920x1080 -codec:v mpeg4 -c:v libx264 -r {0:s} -shortest".format( str(output_framerate)) export_command = "ffmpeg -y {0:s} {1:s} -map 0:0 -map \"[audio]\" {2:s} {3:s}" export_command = export_command.format(video_inputs, audio_inputs, output_flags, video_output) # generate video using ffmpeg .... print("Saving data to: " + video_output) print(export_command) os.system(export_command) # delete temporary images print("Deleting Temporary Files") for idx in range(total_frames): os.remove(temporary_prefix + str(idx) + ".png")
def main(): # usage check if len(sys.argv) < 2: print("Usage:") print("") print("\tpython {0:s} config [gt_labels]".format(sys.argv[0])) print("") print("Where") print("\tconfig:\tPath to AccessMath configuration file") print("\tgt_labels:\tuse ground truth action labels (Default= False)") return # read the configuration file .... config = Configuration.from_file(sys.argv[1]) try: database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH")) except: print("Invalid AccessMath Database file") return output_dir = config.get_str("OUTPUT_PATH") video_metadata_dir = output_dir + "/" + config.get_str( "SPEAKER_ACTION_VIDEO_META_DATA_DIR") action_class_probabilities_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR") output_bboxes_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR") temporal_segments_dir = output_dir + "/" + config.get( "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR") os.makedirs(temporal_segments_dir, exist_ok=True) dataset_name = config.get("SPEAKER_TESTING_SET_NAME") testing_set = database.datasets[dataset_name] valid_actions = config.get("SPEAKER_VALID_ACTIONS") if len(sys.argv) >= 3: use_ground_truth = int(sys.argv[2]) > 0 else: use_ground_truth = False for current_lecture in testing_set: info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + current_lecture.title + ".csv" video_info = MiscHelper.dump_load(info_filename) segmenter = VideoSegmenter.FromConfig(config, video_info["width"], video_info["height"]) # read label data .... prob_info = ResultReader.read_actions_probabilities_file( proba_filename, valid_actions) segments, gt_actions, pred_actions, prob_actions = prob_info # read bbox data ... bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv" frame_idxs, frame_actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file( bbox_filename, use_ground_truth) # (splits_frames, video_keyframes) video_data = segmenter.get_keyframes(pred_actions, segments, frame_idxs, body_bboxes, rh_bboxes) print("") print("video key_frames") print(video_data[0]) print(video_data[1]) print("") output_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle" MiscHelper.dump_save(video_data, output_filename)