def get_video_files(argvs):
    try:
        database = MetaDataDB.from_file(argvs[1])
    except:
        print("Invalid database file")
        return None, None, None

    # now search for specified lecture
    lecture_name = argvs[2].lower()

    current_lecture = None
    for lecture in database.lectures:
        if lecture.title.lower() == lecture_name:
            current_lecture = lecture
            break

    if current_lecture is None:
        print("Lecture not found in database")
        print("Available lectures:")
        for lecture in database.lectures:
            print(lecture.title)
        return None, None, None

    m_videos = [video["path"] for video in current_lecture.main_videos]

    return m_videos, database, current_lecture
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config [dataset]".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        print("\tdataset:\tDataset to run (Default= Training)")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    video_metadata_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_VIDEO_META_DATA_DIR")
    os.makedirs(video_metadata_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    for current_lecture in testing_set:
        print("")
        print("processing: " + current_lecture.title)
        # print(all_keyframes)

        # the simple frame sampling worker ..
        worker = SimpleFrameSampler()

        # main video file names
        m_videos = [
            config.get_str("VIDEO_FILES_PATH") + "/" + video["path"]
            for video in current_lecture.main_videos
        ]

        video_info = {}
        if "forced_width" in current_lecture.parameters:
            video_info["width"] = current_lecture.parameters["forced_width"]
            video_info["height"] = current_lecture.parameters["forced_height"]
        else:
            # execute the actual process ....
            processor = SequentialVideoSampler(m_videos, [0])
            processor.doProcessing(worker, 0, True)  # 0

            video_info["width"] = worker.width
            video_info["height"] = worker.height

        output_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(video_info, output_filename)
Beispiel #3
0
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE")
    normalization_bone = config.get("SPEAKER_NORMALIZATION_BONE")  # pair of norm factor points

    # get the paths to the outputs from previous scripts ....
    output_dir = config.get_str("OUTPUT_PATH")

    # the per lecture openpose CSV
    lecture_filename_prefix = output_dir + "/" + config.get_str("OPENPOSE_OUTPUT_DIR_CSV") + "/" + database.name + "_"

    output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")
    os.makedirs(output_segment_dir, exist_ok=True)

    segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH")

    for lecture in testing_set:
        lecture_filename = lecture_filename_prefix + lecture.title + ".csv"
        print("Loading data for: " + lecture_filename)

        # get the corresponding data for this lecture ...
        lec_segments, lecture_data = LecturePoseSegments.InitializeFromLectureFile(lecture_filename, normalization_bone,
                                                                                   remove_confidence)

        # sequential sampling for pose segments
        vid_len = lecture_data.shape[0]
        for ind in range(0, int(vid_len / segment_length)):
            f_start = ind * segment_length
            f_end = f_start + segment_length - 1
            temp_data = lecture_data[f_start:f_end + 1, :]

            lec_segments.segments.append(PoseSegmentData(f_start, f_end, None, temp_data))

        # save ....
        output_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        MiscHelper.dump_save(lec_segments, output_filename)
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")
    classifier_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_DIR")
    os.makedirs(classifier_dir, exist_ok=True)
    classifier_filename = classifier_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_FILENAME")

    dataset_name = config.get("SPEAKER_TRAINING_SET_NAME")
    training_set = database.datasets[dataset_name]
    training_titles = [lecture.title.lower() for lecture in training_set]

    # get classifier parameters
    rf_n_trees = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_TREES", 64)
    rf_depth = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_DEPTH", 16)

    # read all training data available ....
    train_dataset = {}
    for lecture in training_set:
        input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        train_dataset[lecture.title.lower()] = MiscHelper.dump_load(
            input_filename)

    train_x, train_y, train_frame_infos = PoseFeatureExtractor.combine_datasets(
        training_titles, train_dataset)

    # classify and confusion matrix part
    clf = RandomForestClassifier(n_estimators=rf_n_trees,
                                 max_depth=rf_depth,
                                 random_state=0)
    clf = clf.fit(train_x, train_y)

    MiscHelper.dump_save(clf, classifier_filename)
Beispiel #5
0
    def prepare(args):
        # load database
        try:
            database = MetaDataDB.from_file(args['database'])
        except:
            print("Invalid AccessMath database file")
            return

        TangentV_Helper.VisualizerServer = database.indexing.visualization_server

        # ... Load 3D structures ....
        print("Loading CC indices per lecture ... ")
        for lecture in database.lectures:
            struct_filename = database.output_temporal + '/' + Parameters.Output_ST3D + str(lecture.id) + ".dat"
            TangentV_Helper.cache_3DSTs[lecture.title] = MiscHelper.dump_load(struct_filename)
    def initialize(self):
        # load database info
        try:
            self.database = MetaDataDB.from_file(self.database_file)
        except:
            print("Invalid database file")
            return False

        self.params = MiscHelper.optional_parameters(self.raw_params, 0)

        # process the specified dataset(s)
        if "d" in self.params:
            if not isinstance(self.params["d"], list):
                self.params["d"] = [self.params["d"]]

            valid_datasets = []
            for name in self.params["d"]:
                dataset = self.database.get_dataset(name)

                if dataset is None:
                    print("Invalid Dataset name <" + name + ">")
                    return False
                else:
                    valid_datasets.append(dataset)

            self.params["d"] = valid_datasets

        # process only the specified lectures
        if "l" in self.params:
            if not isinstance(self.params["l"], list):
                self.params["l"] = [self.params["l"]]

            self.params["l"] = [name.lower() for name in self.params["l"]]

        # override the input prefix
        if "i" in self.params:
            self.input_temp_prefix = self.params["i"]

        # override the output prefix
        if "o" in self.params:
            self.input_temp_prefix = self.params["o"]

        self.temp_dir = self.database.output_temporal
        self.out_dir = self.database.output_preprocessed
        self.img_dir = self.database.output_images

        # success loading database file ..
        return True
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")

    dataset_name = config.get("SPEAKER_TRAINING_SET_NAME")
    training_set = database.datasets[dataset_name]

    # prepare the feature extractor ...
    feature_points = config.get("SPEAKER_ACTION_FEATURE_POINTS")
    segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH", 15)
    feat_extractor = PoseFeatureExtractor(feature_points, segment_length)

    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")
    os.makedirs(features_dir, exist_ok=True)

    # for each file ... get features ...
    for lecture in training_set:
        input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        output_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"

        lecture_pose_segments = MiscHelper.dump_load(input_filename)

        vid_data = feat_extractor.get_feature_dataset(lecture_pose_segments)

        MiscHelper.dump_save(vid_data, output_filename)

    return
Beispiel #8
0
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("\n\tWhere:")
        print(
            "\tgt_lablels:\t(Optional) Set to 1 to use Ground Truth labels instead of predictions"
        )
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    valid_actions = config.get("SPEAKER_VALID_ACTIONS")

    # get the paths to the outputs from previous scripts ....
    output_dir = config.get_str("OUTPUT_PATH")
    output_segment_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")

    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    os.makedirs(output_bboxes_dir, exist_ok=True)

    remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE")
    speaker_right_handed = config.get("SPEAKER_IS_RIGHT_HANDED")

    n_joints_body = 25
    n_joints_hand = 21

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    col_name = [
        'frame_id', ('ground_truth' if use_ground_truth else 'pred_label'),
        'body_xmin', 'body_xmax', 'body_ymin', 'body_ymax', 'rh_xmin',
        'rh_xmax', 'rh_ymin', 'rh_ymax'
    ]

    segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH")

    # load data + label
    for lecture in testing_set:
        input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        lec_segments = MiscHelper.dump_load(input_filename)

        if use_ground_truth:
            labels = lec_segments.get_all_labels()
        else:
            input_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv"
            _, _, labels, _ = ResultReader.read_actions_probabilities_file(
                input_proba_filename, valid_actions)

        output_filename = output_bboxes_dir + "/" + database.name + "_" + lecture.title + ".csv"
        output_file = ResultRecorder(output_filename)
        output_file.write_headers(col_name)

        # get bbox for skeleton and right hands from all segments
        frames = []
        segment_labels = []
        body_bbox = []
        rh_bbox = []
        for ind in range(0, len(lec_segments.segments)):
            # get the pose data ...
            if not remove_confidence:
                # the data contains confidence ... which needs to be removed at this point ...
                base_pose_data = lec_segments.segments[ind].pose_data

                total_joints = n_joints_body + n_joints_hand * 2
                seg_pose_data = np.zeros(
                    (base_pose_data.shape[0], total_joints * 2),
                    dtype=base_pose_data.dtype)

                seg_pose_data[:, ::2] = base_pose_data[:, ::3]
                seg_pose_data[:, 1::2] = base_pose_data[:, 1::3]
            else:
                # confidence has been removed ....
                seg_pose_data = lec_segments.segments[ind].pose_data

            body_features = seg_pose_data[:, 0:n_joints_body * 2]
            if speaker_right_handed:
                # get right hand data
                rh_features = seg_pose_data[:,
                                            (n_joints_body + n_joints_hand) *
                                            2:]
            else:
                # use left hand data
                rh_features = seg_pose_data[:, n_joints_body *
                                            2:(n_joints_body + n_joints_hand) *
                                            2]

            # get body bboxes and add to the list ....
            temp_body_bbox = PoseSegmentData.get_bbox_frame_data(
                body_features, 2)
            body_bbox += temp_body_bbox.tolist()

            # get hand bboxes and add to the list ....
            temp_rh_bbox = PoseSegmentData.get_bbox_frame_data(rh_features, 2)
            rh_bbox += temp_rh_bbox.tolist()

            # add frame range ....
            f_start = lec_segments.segments[ind].frame_start
            f_end = lec_segments.segments[ind].frame_end
            temp_frames = list(range(f_start, f_end + 1))
            frames += temp_frames

            # add label ....
            temp_label = [[labels[ind]] for _ in range(segment_length)
                          ]  # remove seg_len, you don't need this
            segment_labels += temp_label

        paras = frames, segment_labels, body_bbox, rh_bbox
        output_file.record_results(paras)
Beispiel #9
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        print("\tgt_labels:\tuse ground truth action labels (Default= False)")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    video_metadata_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_VIDEO_META_DATA_DIR")
    fg_mask_dir = output_dir + "/" + config.get_str(
        "SPEAKER_FG_ESTIMATION_MASK_DIR")
    os.makedirs(fg_mask_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    speaker_exp_factor = config.get_float(
        "SPEAKER_FG_ESTIMATION_SPK_EXPANSION_FACTOR")
    min_mask_frames = config.get_int("SPEAKER_FG_ESTIMATION_MIN_MASK_FRAMES")
    mask_exp_radius = config.get_int(
        "SPEAKER_FG_ESTIMATION_MASK_EXPANSION_RADIUS")

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    for current_lecture in testing_set:
        bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv"
        frame_idxs, actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file(
            bbox_filename, use_ground_truth)

        info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_info = MiscHelper.dump_load(info_filename)

        fg_estimator = ForegroundEstimator(video_info["width"],
                                           video_info["height"],
                                           speaker_exp_factor, min_mask_frames,
                                           mask_exp_radius)

        fg_mask = fg_estimator.get_mask(frame_idxs, actions, body_bboxes,
                                        rh_bboxes)

        # cv2.imshow(current_lecture.id, fg_mask)
        # cv2.waitKey()

        flag, raw_data = cv2.imencode(".png", fg_mask)

        output_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(raw_data, output_filename)
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")

    action_class_output_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_OUTPUT_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")
    os.makedirs(action_class_output_dir, exist_ok=True)
    os.makedirs(action_class_probabilities_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    # load the saved model for action classification ...
    classifier_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_DIR")
    classifier_filename = classifier_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_FILENAME")
    clf = MiscHelper.dump_load(classifier_filename)

    csv_col = ['frame_start', 'frame_end', 'prediction']

    for lecture in testing_set:
        input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        output_actions_filename = action_class_output_dir + "/" + database.name + "_" + lecture.title + ".csv"
        output_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv"

        # load data ...
        data_xy = MiscHelper.dump_load(input_filename)

        # classier predict ....
        test_x = data_xy["features"]
        y_pred = clf.predict(test_x)
        y_pred_re = y_pred.reshape((y_pred.shape[0], 1))

        # save prediction result
        output_csv = ResultRecorder(output_actions_filename)
        output_csv.write_headers(csv_col)

        # the function accepts a list of columns to save on CSV ...
        # by transposing, we make the standard list of rows into a list of columns for the function ...
        paras = np.hstack((data_xy["frame_infos"], y_pred[:,
                                                          None])).transpose()
        output_csv.record_results(paras)

        # save label probabilities
        all_classes = clf.classes_
        y_prob = clf.predict_proba(test_x)
        infos = np.concatenate((y_pred_re, y_prob), axis=1)
        output_csv = ResultRecorder(output_proba_filename)
        output_csv.write_headers(csv_col + all_classes.tolist())
        # ... IDEM ....
        paras = np.hstack((data_xy["frame_infos"], infos)).transpose()
        output_csv.record_results(paras)
Beispiel #11
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    export_prefix = output_dir + "/" + database.output_annotations + "/" + database.name + "_"

    action_object_name = config.get_str("SPEAKER_ACTION_MAIN_OBJECT",
                                        "speaker")
    action_segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH", 15)
    action_segment_sampling = config.get_int(
        "SPEAKER_ACTION_SEGMENT_SAMPLING_MODE", 2)  # MODE!!
    action_segment_tracks = config.get_int(
        "SPEAKER_ACTION_SEGMENT_SAMPLING_TRACKS", 4)
    action_segment_output_dir = config.get_str(
        "SPEAKER_ACTION_SEGMENT_OUTPUT_DIR", ".")

    segments_output_prefix = output_dir + "/" + action_segment_output_dir + "/" + database.name + "_"
    os.makedirs(output_dir + "/" + action_segment_output_dir, exist_ok=True)

    sampler = ActionSegmentSampling(action_segment_sampling,
                                    action_segment_length,
                                    action_segment_tracks)

    # for each data set ...
    for dataset_name in database.datasets:
        print("Processing data set: " + dataset_name)
        # get segments ...
        all_dataset_segments = []
        for current_lecture in database.datasets[dataset_name]:
            exported_data_filename = export_prefix + current_lecture.title.lower(
            ) + "_" + action_object_name + ".csv"
            print(" - input file: " + exported_data_filename)

            if not os.path.exists(exported_data_filename):
                print("\tWARNING: File not found!")
                continue

            # call here the sampler ....
            lecture_title = current_lecture.title.lower()
            lecture_segments = sampler.sample_from_file(
                exported_data_filename, lecture_title)

            all_dataset_segments += lecture_segments

        # prepare text lines ...
        output_lines = ["lecture_title,frame_start,frame_end,action\n"]
        for segment in all_dataset_segments:
            output_lines.append(",".join([str(value)
                                          for value in segment]) + "\n")

        # save segments for dataset ....
        output_filename = segments_output_prefix + dataset_name + "_" + action_object_name + ".csv"
        with open(output_filename, "w") as out_file:
            out_file.writelines(output_lines)

        print(" - data saved to: " + output_filename)

    print("Process complete!")
Beispiel #12
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    keyframes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_KEYFRAMES_DIR")
    os.makedirs(keyframes_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    for current_lecture in testing_set:
        # read segment data ....
        input_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_segment_data = MiscHelper.dump_load(input_filename)

        # key-frames that must be extracted from video ...
        segments, keyframes_per_segment = video_segment_data
        all_keyframes = []
        for segment_keyframes in keyframes_per_segment:
            all_keyframes += [
                keyframe_idx for keyframe_idx, bbox in segment_keyframes
            ]

        print("")
        print("processing: " + current_lecture.title)
        # print(all_keyframes)

        # the simple frame sampling worker ..
        worker = SimpleFrameSampler()

        # main video file names
        m_videos = [
            config.get_str("VIDEO_FILES_PATH") + "/" + video["path"]
            for video in current_lecture.main_videos
        ]

        # execute the actual process ....
        processor = SequentialVideoSampler(m_videos, all_keyframes)

        if "forced_width" in current_lecture.parameters:
            processor.force_resolution(
                current_lecture.parameters["forced_width"],
                current_lecture.parameters["forced_height"])
        processor.doProcessing(worker, 0, True)  # 0

        sampled_frame_data = worker.frame_times, worker.frame_indices, worker.compressed_frames

        # save results
        keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(sampled_frame_data, keyframes_data_filename)
Beispiel #13
0
def main():
    if len(sys.argv) < 2:
        print("Usage:")
        print("\tpython train_ml_binarizer.py config [force_update] [classifier_file] [patch_size]")
        print("")
        print("Where")
        print("\tconfig\t\t\tPath to config file")
        print("\tforce_update \t\tOptional, force to update the sampled Patch file")
        print("\tclassifier_file \tOptional, force classifier path diff. from Config")
        print("\tpatch_size \t\tOptional, override patch size")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    # load the database
    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid database file")
        return

    # <Parameters>
    run_crossvalidation = config.get("ML_BINARIZER_TRAIN_RUN_CROSSVALIDATION", True)

    if not config.contains("ML_BINARIZER_PATCHES_FILENAME"):
        print("Must specificy a file to store sampled patches")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    ml_binarizer_dir = output_dir + "/" + config.get_str("ML_BINARIZER_DIR")
    patch_filename = ml_binarizer_dir + "/" + config.get_str("ML_BINARIZER_PATCHES_FILENAME")

    # For debugging/comparsion, use OTSU binarization
    OTSU_mode = config.get("ML_BINARIZER_TRAIN_OTSU_MODE", False)

    # baseline_mode = True # Train Random Forest instead .
    retrain_classifier = config.get("ML_BINARIZER_TRAIN_RETRAIN", True)

    if not config.get("ML_BINARIZER_OVERRIDE_PARAMETERS", False):
        # Sampling mode #1: Distribution of proportions
        #
        # of 100% pixels, we sample fg_proportion from GT Foreground pixels (handwriting pixels)
        #    handwriting pixels = fg_proportion
        #    all background     = (1 - fg_proportion)
        #
        # The remaining background pixels are sampled as close or far from foreground
        #    Close to Foreground pixels = (1 - fg_proportion) * bg_close_prop
        #    Remaining background pixels = (1 - fg_proportion) * (1 - bg_close_prop)
        #
        # The last proportion of pixels can be obtained from whiteboard or background objects, we separate them as
        #    Not Close Whiteboard background pixels = (1 - fg_proportion) * (1 - bg_close_prop) * bg_board_prop
        #    Not Whiteboard background pixels       = (1 - fg_proportion) * (1 - bg_close_prop) * (1 - bg_board_prop)
        #
        # Sampling mode #2: Distribution of proportions
        #
        # of 100% pixels, we sample fg_proportion from GT Foreground pixels (handwriting pixels)
        #    handwriting pixels = fg_proportion
        #    all background     = (1 - fg_proportion)
        #
        # The remaining background pixels by average intensity of the window, the greater the average, the more likely they
        #   are to be sampled. This is consistent with sampling mode 1, but is less discrete and requires less parameters
        sampling_mode = Parameters.MLBin_sampling_mode

        patch_size = Parameters.MLBin_patch_size
        patches_per_frame = Parameters.MLBin_sampling_patches_per_frame
        fg_proportion = Parameters.MLBin_sampling_fg_proportion
        bg_close_prop = Parameters.MLBin_sampling_bg_close_prop
        bg_board_prop = Parameters.MLBin_sampling_bg_board_prop

        mlbin_sigma_color = Parameters.MLBin_sigma_color
        mlbin_sigma_space = Parameters.MLBin_sigma_space
        mlbin_median_blur_k = Parameters.MLBin_median_blur_k
        mlbin_dark_background = Parameters.MLBin_dark_background

        feature_workers = Parameters.MLBin_train_workers

        # Random Forest
        rf_n_trees = Parameters.MLBin_rf_n_trees           # 16
        rf_max_depth = Parameters.MLBin_rf_max_depth       # 12
        rf_max_features = Parameters.MLBin_rf_max_features # 32

    else:
        print("Reading ML Binarizer parameters from config ...")

        sampling_mode = config.get_int("ML_BINARIZER_SAMPLING_MODE", 2)

        patch_size = config.get_int("ML_BINARIZER_PATCH_SIZE", 7)
        patches_per_frame = config.get_int("ML_BINARIZER_SAMPLING_PATCHES_PER_FRAME", 20000)
        fg_proportion = config.get_float("ML_BINARIZER_SAMPLING_FG_PROPORTION", 0.5)
        bg_close_prop = config.get_float("ML_BINARIZER_SAMPLING_BG_CLOSE_PROPORTION", 0.9)
        bg_board_prop = config.get_float("ML_BINARIZER_SAMPLING_BG_BOARD_PROPORTION", 1.0)

        mlbin_sigma_color = config.get_float("ML_BINARIZER_SIGMA_COLOR", 13.5)
        mlbin_sigma_space = config.get_float("ML_BINARIZER_SIGMA_SPACE", 4.0)
        mlbin_median_blur_k = config.get_int("ML_BINARIZER_MEDIAN_BLUR_K", 33)
        mlbin_dark_background = config.get("ML_BINARIZER_DARK_BACKGROUND")

        feature_workers = config.get_int("ML_BINARIZER_TRAIN_WORKERS", 7)

        # Random Forest
        rf_n_trees = config.get_int("ML_BINARIZER_RF_N_TREES", 16)  # 16
        rf_max_depth = config.get_int("ML_BINARIZER_RF_MAX_DEPTH", 12)  # 12
        rf_max_features = config.get_int("ML_BINARIZER_RF_MAX_FEATURES", 32) # 32


    if len(sys.argv) >= 4:
        # user specified location
        classifier_file = sys.argv[3]
    else:
        # by default, store at the place specified in the configuration or parameters file ...
        if not config.get("ML_BINARIZER_OVERRIDE_PARAMETERS", False):
            classifier_file = Parameters.MLBin_classifier_file
        else:
            classifier_file = ml_binarizer_dir + "/" + config.get_str("ML_BINARIZER_CLASSIFIER_FILENAME")

    feature_function = get_patch_features_raw_values

    # </Parameters>

    if len(sys.argv) >= 3:
        try:
            force_update = int(sys.argv[2]) > 0
        except:
            print("Invalid value for force_udpate")
            return
    else:
        force_update = False

    if len(sys.argv) >= 5:
        try:
            patch_size = int(sys.argv[4])
        except:
            print("Invalid value for patch_size")
            return

    assert (patch_size - 1) % 2 == 0
    bg_close_neighborhood = int((patch_size - 1) / 2) + 1
    
    print("Classifier Path: " + classifier_file)
    ml_binarizer = MLBinarizer(None, patch_size, mlbin_sigma_color, mlbin_sigma_space, mlbin_median_blur_k,
                               mlbin_dark_background)

    print("... loading data ...")
    start_loading = time.time()
    all_keyframes, binarized_keyframes = load_keyframes(output_dir, database)
    fake_unique_groups, fake_cc_group, fake_segments = generate_fake_keyframe_info(all_keyframes)

    print("Total Training keyframes: " + str(len(all_keyframes)))

    end_loading = time.time()
    start_preprocessing = time.time()

    print("Pre-processing key-frames", flush=True)
    all_preprocessed = []
    for kf_idx, kf in enumerate(all_keyframes):
        all_preprocessed.append(ml_binarizer.preprocessing(kf.raw_image))
        # cv2.imwrite("DELETE_NOW_tempo_bin_input_" + str(kf_idx) + ".png", all_preprocessed[-1])

    end_preprocessing = time.time()
    start_patch_extraction = time.time()

    # Extracting/Loading patches used for training (only if not on OTSU's mode)
    if not OTSU_mode:
        # generate the patch-based training set ...
        # check if patch file exists ...
        if not os.path.exists(patch_filename) or force_update:
            print("Extracting patches...")

            if sampling_mode == 1:
                # SampleEdgeFixBg()
                patches = PatchSampling.SampleEdgeFixBg(all_keyframes, all_preprocessed, patch_size, patches_per_frame,
                                                        fg_proportion, bg_close_prop, bg_board_prop, bg_close_neighborhood)
            elif sampling_mode == 2:
                # SampleEdgeContBg
                patches = PatchSampling.SampleEdgeContBg(all_keyframes, all_preprocessed, patch_size, patches_per_frame,
                                                         fg_proportion)
            else:
                patches = (None, None)

            patches_images, patches_labels = patches

            # generate features
            print("\nGenerating features ...", flush=True)
            all_features = []
            with ProcessPoolExecutor(max_workers=feature_workers) as executor:
                for lect_idx, lecture_images in enumerate(patches_images):
                    print("Processing patches from lecture {0:d} out of {1:d}".format(lect_idx + 1, len(patches_images)))
                    lecture_features = []
                    for i, patch_features in enumerate(executor.map(feature_function, lecture_images)):
                        lecture_features.append(patch_features)

                    all_features.append(lecture_features)

            print("\nSaving patches and features to file")
            out_file = open(patch_filename, "wb")
            pickle.dump(patches_labels, out_file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(patches_images, out_file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(all_features, out_file, pickle.HIGHEST_PROTOCOL)
            out_file.close()
        else:
            # load patches from file ....
            print("Loading patches and features from file")
            in_file = open(patch_filename, "rb")
            patches_labels = pickle.load(in_file)
            patches_images = pickle.load(in_file)
            all_features = pickle.load(in_file)
            in_file.close()

    end_patch_extraction = time.time()
    
    total_training_time = 0.0
    total_binarization_time = 0.0
    total_evaluation_time = 0.0

    cross_validated_classifiers = []
    
    if not OTSU_mode:
        start_training = time.time()

        # train classifier using training patches ...
        count_all_patches = sum([len(lecture_images) for lecture_images in patches_images])
        print("Total patches available for training: " + str(count_all_patches))

        n_features = len(all_features[0][0])
        print("Total Features: " + str(n_features))

        # check local performance using cross-validation based on leaving one lecture out
        conf_matrix = np.zeros((2, 2), dtype=np.int32)
        avg_train_accuracy = 0.0
        rf_max_features = min(rf_max_features, n_features)

        if run_crossvalidation:
            for i in range(len(patches_images)):
                print("Cross-validation fold #" + str(i + 1))

                training_data = []
                training_labels = []
                testing_data = []
                testing_labels = []
                for k in range(len(patches_images)):
                    if i == k:
                        testing_data += all_features[k]
                        testing_labels += patches_labels[k]
                    else:
                        training_data += all_features[k]
                        training_labels += patches_labels[k]

                training_data = np.array(training_data)
                testing_data = np.array(testing_data)

                print("-> Training Samples: " + str(training_data.shape[0]))
                print("-> Testing Samples: " + str(testing_data.shape[0]))

                # classification mode ...
                # random forest ...
                classifier = RandomForestClassifier(rf_n_trees, max_features=rf_max_features, max_depth=rf_max_depth, n_jobs=-1)
                classifier.fit(training_data, training_labels)
                
                # keep reference to the n-th fold classifier
                cross_validated_classifiers.append(classifier)

                pred_labels = classifier.predict(training_data)
                train_conf_matrix = np.zeros((2, 2), dtype=np.int32)
                for train_idx in range(len(training_labels)):
                    train_conf_matrix[training_labels[train_idx], pred_labels[train_idx]] += 1
                pixel_accuracy = (train_conf_matrix[0, 0] + train_conf_matrix[1, 1]) / len(training_labels)
                print("-> Train pixel accuracy: " + str(pixel_accuracy * 100.0))
                avg_train_accuracy += pixel_accuracy

                pred_labels = classifier.predict(testing_data)

                for test_idx in range(len(testing_labels)):
                    conf_matrix[testing_labels[test_idx], pred_labels[test_idx]] += 1


            pixel_accuracy = (conf_matrix[0, 0] + conf_matrix[1, 1]) / count_all_patches
            avg_train_accuracy /= len(all_features)

            print("Combined testing confusion matrix: ")
            print(conf_matrix)
            print("Final training pixel accuracy: " + str(avg_train_accuracy * 100.0))
            print("Final testing pixel accuracy: " + str(pixel_accuracy * 100.0))

        # now, use all data to train a classifier for binarization of all frames ...
        if not os.path.exists(classifier_file) or force_update or retrain_classifier:
            print("Training classifier using all patches", flush=True)
            # classification
            training_data = []
            training_labels = []
            for k in range(len(patches_images)):
                training_data += all_features[k]
                training_labels += patches_labels[k]

            training_data = np.array(training_data)

            # Train Random Forest
            classifier = RandomForestClassifier(rf_n_trees, max_features=rf_max_features, max_depth=rf_max_depth, n_jobs=-1)
            classifier.fit(training_data, training_labels)
            
            print("Saving classifier to file")
            out_file = open(classifier_file, "wb")
            pickle.dump(classifier, out_file, pickle.HIGHEST_PROTOCOL)
            out_file.close()
        else:
            print("Loading classifier from file")
            in_file = open(classifier_file, "rb")
            classifier = pickle.load(in_file)
            in_file.close()

        # release memory (a lot) of elements that will not be used after this point ...
        all_features = None
        patches_labels = None
        training_data = None
        training_labels = None
        testing_data = None
        testing_labels = None

        end_training = time.time()
        total_training_time += end_training - start_training
    
    # binarize using parameter combination...
    start_binarizing = time.time()

    last_lecture = None
    lecture_offset = -1
    training_set = database.get_dataset("training")
    
    for idx, bin_kf in enumerate(binarized_keyframes):
        if bin_kf.lecture != last_lecture:
            last_lecture = bin_kf.lecture
            lecture_offset += 1

        print("binarizing kf #" + str(idx) + ", from " + training_set[lecture_offset].title, end="\r", flush=True)

        if OTSU_mode:
            # ideal BG removal ...
            #strel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(patch_size), int(patch_size)))
            #bg_mask = all_keyframes[idx].object_mask > 0
            #all_preprocessed[idx][bg_mask] = 0

            otsu_t, bin_res = cv2.threshold(all_preprocessed[idx].astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            
            bin_kf.binary_image = np.zeros((bin_res.shape[0], bin_res.shape[1], 3), dtype=np.uint8)
            bin_kf.binary_image[:, :, 0] = 255 - bin_res.copy()
            bin_kf.binary_image[:, :, 1] = bin_kf.binary_image[:, :, 0].copy()
            bin_kf.binary_image[:, :, 2] = bin_kf.binary_image[:, :, 0].copy()
        else:
            # set classifier for binarization ....
            if run_crossvalidation:
                # use the classifier that has not seen this image ...
                ml_binarizer.classifier = cross_validated_classifiers[lecture_offset]
            else:
                # use the globally train classifier
                ml_binarizer.classifier = classifier

            # ... binarize the pre-processed image ... 
            binary_image = ml_binarizer.preprocessed_binarize(all_preprocessed[idx])
            
            # Do hystheresis filtering ...
            otsu_t, high_bin = cv2.threshold(all_preprocessed[idx].astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            low_bin = binary_image
            
            filtered_bin = 255 - MLBinarizer.binary_hysteresis(low_bin, high_bin)
            
            bin_kf.binary_image = np.zeros((filtered_bin.shape[0], filtered_bin.shape[1], 3), dtype=np.uint8)
            bin_kf.binary_image[:, :, 0] = filtered_bin
            bin_kf.binary_image[:, :, 1] = filtered_bin
            bin_kf.binary_image[:, :, 2] = filtered_bin

        bin_kf.update_binary_cc(False)

        if config.get("ML_BINARIZER_SAVE_BINARY", True):
            if OTSU_mode:
                out_name = "TEMPO_OTSU_baseline_binarized_" + str(idx) + ".png"
            else:
                out_name = "TEMPO_rf_baseline_binarized_" + str(idx) + ".png"

            cv2.imwrite(out_name, bin_kf.binary_image)
        
    end_binarizing = time.time()
    total_binarization_time += end_binarizing - start_binarizing

    # run evaluation metrics ...
    print("Computing final evaluation metrics....")
    
    # Summary level metrics ....
    start_evaluation = time.time()
    
    EvalParameters.UniqueCC_global_tran_window = 1
    EvalParameters.UniqueCC_min_precision = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.90, 0.95]
    EvalParameters.UniqueCC_min_recall = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.90, 0.95]
    EvalParameters.Report_Summary_Show_Counts = False
    EvalParameters.Report_Summary_Show_AVG_per_frame = False
    EvalParameters.Report_Summary_Show_Globals = True

    all_scope_metrics, scopes = Evaluator.compute_summary_metrics(fake_segments, all_keyframes, fake_unique_groups,
                                                            fake_cc_group, fake_segments, binarized_keyframes,
                                                            False)


    for scope in scopes:
        print("")
        print("Metrics for scope: " + scope)
        print("      \t      \tRecall\t      \t       \tPrecision")
        print("Min R.\tMin P.\tE + P\tE. Only\tP. Only\tE + P\tE. Only\tP. Only\tBG. %\tNo BG P.")
        scope_metrics = all_scope_metrics[scope]

        recall_percent_row = "{0:.2f}\t{1:.2f}\t{2:.2f}\t{3:.2f}\t{4:.2f}"
        prec_percent_row = "{0:.2f}\t{1:.2f}\t{2:.2f}\t{3:.2f}\t{4:.2f}"

        for all_metrics in scope_metrics:
            metrics = all_metrics["recall_metrics"]

            recall_str = recall_percent_row.format(all_metrics["min_cc_recall"] * 100.0,
                                                   all_metrics["min_cc_precision"] * 100.0,
                                                   metrics["recall"] * 100.0, metrics["only_exact_recall"] * 100.0,
                                                   metrics["only_partial_recall"] * 100.0)

            metrics = all_metrics["precision_metrics"]

            prec_str = prec_percent_row.format(metrics["precision"] * 100.0, metrics["only_exact_precision"] * 100.0,
                                               metrics["only_partial_precision"] * 100.0,
                                               metrics["global_bg_unmatched"] * 100.0,
                                               metrics["no_bg_precision"] * 100.0)

            print(recall_str + "\t" + prec_str)

    # pixel level metrics
    pixel_metrics = Evaluator.compute_pixel_binary_metrics(all_keyframes, binarized_keyframes)
    print("Pixel level metrics")
    for key in sorted(pixel_metrics.keys()):
        print("{0:s}\t{1:.2f}".format(key, pixel_metrics[key] *100.0))
    
    end_evaluation = time.time()
    total_evaluation_time += end_evaluation - start_evaluation
    end_everything = time.time()

    print("Total loading time: " + TimeHelper.secondsToStr(end_loading - start_loading))
    print("Total preprocessing time: " + TimeHelper.secondsToStr(end_preprocessing - start_preprocessing))
    print("Total patch extraction time: " + TimeHelper.secondsToStr(end_patch_extraction - start_patch_extraction))
    print("Total training time: " + TimeHelper.secondsToStr(total_training_time))
    print("Total binarization time: " + TimeHelper.secondsToStr(total_binarization_time))
    print("Total evaluation time: " + TimeHelper.secondsToStr(total_evaluation_time))
    print("Total Time: " + TimeHelper.secondsToStr(end_everything - start_loading))
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # inputs / output paths
    output_dir = config.get_str("OUTPUT_PATH")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    keyframes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_KEYFRAMES_DIR")
    fg_mask_dir = output_dir + "/" + config.get_str(
        "SPEAKER_FG_ESTIMATION_MASK_DIR")

    summaries_dir = output_dir + "/" + database.output_summaries
    os.makedirs(summaries_dir, exist_ok=True)
    summary_prefix = summaries_dir + "/" + config.get_str(
        "SPEAKER_SUMMARY_PREFIX") + "_" + database.name + "_"

    # current dataset ....
    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    print("... preparing summary generator ...")
    summ_generator = SummaryGenerator(config)

    for current_lecture in testing_set:
        print("")
        print("Processing: " + current_lecture.title)

        # get all inputs ....

        # read segment data ....
        segments_data_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_segment_data = MiscHelper.dump_load(segments_data_filename)

        # read key-frames data ...
        keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_keyframes_data = MiscHelper.dump_load(keyframes_data_filename)

        # read mask data ...
        fg_mask_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        fg_mask_png = MiscHelper.dump_load(fg_mask_filename)
        fg_mask = cv2.imdecode(fg_mask_png, cv2.IMREAD_GRAYSCALE)

        output_prefix = summary_prefix + current_lecture.title.lower()

        summ_generator.export_summary(database, current_lecture,
                                      video_segment_data, video_keyframes_data,
                                      fg_mask, output_prefix)
Beispiel #15
0
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    unique_label = config.get("SPEAKER_VALID_ACTIONS")

    dataset_name = config.get("SPEAKER_TRAINING_SET_NAME")

    training_set = database.datasets[dataset_name]

    remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE")
    normalization_bone = config.get("SPEAKER_NORMALIZATION_BONE")  # pair of norm factor points

    # get the paths to the outputs from previous scripts ....
    output_dir = config.get_str("OUTPUT_PATH")
    action_object_name = config.get_str("SPEAKER_ACTION_MAIN_OBJECT", "speaker")
    action_segment_output_dir = config.get_str("SPEAKER_ACTION_SEGMENT_OUTPUT_DIR", ".")
    segments_output_prefix = output_dir + "/" + action_segment_output_dir + "/" + database.name + "_"

    # the per lecture openpose CSV
    lecture_filename_prefix = output_dir + "/" + config.get_str("OPENPOSE_OUTPUT_DIR_CSV") + "/" + database.name + "_"

    output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")
    os.makedirs(output_segment_dir, exist_ok=True)

    # First .... cache all OpenPose CSV data per training lecture ....
    data_per_lecture = {}
    for lecture in training_set:
        lecture_filename = lecture_filename_prefix + lecture.title + ".csv"
        print("Loading data for: " + lecture_filename)

        segments, data = LecturePoseSegments.InitializeFromLectureFile(lecture_filename, normalization_bone,
                                                                       remove_confidence)

        data_per_lecture[lecture.title.lower()] = {
            "segments": segments,
            "data": data
        }

    # read the training frame segments info file
    segment_filename = segments_output_prefix + dataset_name + "_" + action_object_name + ".csv"
    speaker_seg_train = pd.read_csv(segment_filename)  # frame segment info of training data of object speaker
    speaker_seg_train = speaker_seg_train.values

    # Split the OpenPose Data based on the given segments ...
    for vid_name, f_start, f_end, label in speaker_seg_train:
        vid_name = vid_name.lower()
        # print((vid_name, f_start, f_end, label))

        # if label is not in the main 8 labels, omit it
        if label not in unique_label:
            continue

        if not vid_name in data_per_lecture:
            print("Invalid lecture name found: " + vid_name)
            continue

        temp_data = data_per_lecture[vid_name]["data"][f_start:f_end + 1, :]

        temp_pose_segment_data = PoseSegmentData(f_start, f_end, label, temp_data)
        data_per_lecture[vid_name]["segments"].segments.append(temp_pose_segment_data)

    # save to file ...
    for lecture in training_set:
        output_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        MiscHelper.dump_save(data_per_lecture[lecture.title.lower()]["segments"], output_filename)

    print("Data Segment Saving Done.")
    return
Beispiel #16
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        print("\tgt_labels:\tuse ground truth action labels (Default= False)")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    video_metadata_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_VIDEO_META_DATA_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")
    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    os.makedirs(temporal_segments_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    valid_actions = config.get("SPEAKER_VALID_ACTIONS")

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    for current_lecture in testing_set:
        info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + current_lecture.title + ".csv"

        video_info = MiscHelper.dump_load(info_filename)

        segmenter = VideoSegmenter.FromConfig(config, video_info["width"],
                                              video_info["height"])

        # read label data ....
        prob_info = ResultReader.read_actions_probabilities_file(
            proba_filename, valid_actions)
        segments, gt_actions, pred_actions, prob_actions = prob_info

        # read bbox data ...
        bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv"
        frame_idxs, frame_actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file(
            bbox_filename, use_ground_truth)

        # (splits_frames, video_keyframes)
        video_data = segmenter.get_keyframes(pred_actions, segments,
                                             frame_idxs, body_bboxes,
                                             rh_bboxes)

        print("")
        print("video key_frames")
        print(video_data[0])
        print(video_data[1])
        print("")

        output_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(video_data, output_filename)