def start_input_processing(self, process_function):
        for lecture in self.database.lectures:
            self.current_lecture = lecture
            m_videos, lecture_file, skip = self.get_lecture_params(lecture)

            if skip:
                continue

            # read temporal file
            if self.input_temp_prefix is None:
                # null-input process (convenient way to process lectures)
                input_data = None
            else:
                if not isinstance(self.input_temp_prefix, list):
                    input_data = MiscHelper.dump_load(self.temp_dir + '/' +
                                                      self.input_temp_prefix +
                                                      lecture_file)
                else:
                    input_data = []
                    for temp_prefix in self.input_temp_prefix:
                        input_data.append(
                            MiscHelper.dump_load(self.temp_dir + '/' +
                                                 temp_prefix + lecture_file))

            # execute the actual process ....
            timer = TimeHelper()
            timer.startTimer()
            results = process_function(self, input_data)
            timer.endTimer()

            print("Process Finished in: " + timer.totalElapsedStamp())

            # save results
            if self.output_temp_prefix is not None:
                if not isinstance(self.output_temp_prefix, list):
                    MiscHelper.dump_save(
                        results, self.temp_dir + '/' +
                        self.output_temp_prefix + lecture_file)
                else:
                    for out_idx, temp_prefix in enumerate(
                            self.output_temp_prefix):
                        MiscHelper.dump_save(
                            results[out_idx],
                            self.temp_dir + '/' + temp_prefix + lecture_file)
    def FromUniformSample(database, lecture, step, sample_name, binary_source):
        # load output from pipeline ...
        lecture_suffix = str(lecture.id) + ".dat"

        # load binary images
        tempo_binary_filename = database.output_temporal + "/" + binary_source + lecture_suffix
        binary_data = MiscHelper.dump_load(tempo_binary_filename)
        original_frame_times, frame_indices, frame_compressed = binary_data

        # take a sample
        frame_times = [time for time in original_frame_times[::step]]
        frame_indices = [idx for idx in frame_indices[::step]]
        frame_compressed = [frame for frame in frame_compressed[::step]]

        print("Expanding loaded frames .... ")
        binary_frames = Helper.decompress_binary_images(frame_compressed)

        # segments ....
        output_prefix = database.output_summaries + "/" + sample_name + "_" + database.name + "_" + lecture.title.lower(
        )
        print("Saving data to: " + output_prefix)

        # in abs frame indices ...
        intervals = []
        abs_intervals = []
        for idx, comp_frame in enumerate(binary_frames):
            if idx == 0:
                curr_start = int(frame_indices[idx] / 2)
                abs_start = frame_times[idx] / 2.0
            else:
                curr_start = int(
                    (frame_indices[idx - 1] + frame_indices[idx]) / 2)
                abs_start = (frame_times[idx - 1] + frame_times[idx]) / 2.0

            if idx + 1 < len(frame_indices):
                curr_end = int(
                    (frame_indices[idx + 1] + frame_indices[idx]) / 2)
                abs_end = (frame_times[idx + 1] + frame_times[idx]) / 2.0
            else:
                curr_end = frame_indices[idx]
                abs_end = frame_times[idx]

            # invert binarization ...
            binary_frames[idx] = 255 - comp_frame

            intervals.append((curr_start, curr_end))
            abs_intervals.append((abs_start, abs_end))

        KeyframeExporter.Export(output_prefix, database, lecture, intervals,
                                abs_intervals, frame_indices, frame_times,
                                binary_frames)
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")
    classifier_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_DIR")
    os.makedirs(classifier_dir, exist_ok=True)
    classifier_filename = classifier_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_FILENAME")

    dataset_name = config.get("SPEAKER_TRAINING_SET_NAME")
    training_set = database.datasets[dataset_name]
    training_titles = [lecture.title.lower() for lecture in training_set]

    # get classifier parameters
    rf_n_trees = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_TREES", 64)
    rf_depth = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_DEPTH", 16)

    # read all training data available ....
    train_dataset = {}
    for lecture in training_set:
        input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        train_dataset[lecture.title.lower()] = MiscHelper.dump_load(
            input_filename)

    train_x, train_y, train_frame_infos = PoseFeatureExtractor.combine_datasets(
        training_titles, train_dataset)

    # classify and confusion matrix part
    clf = RandomForestClassifier(n_estimators=rf_n_trees,
                                 max_depth=rf_depth,
                                 random_state=0)
    clf = clf.fit(train_x, train_y)

    MiscHelper.dump_save(clf, classifier_filename)
Beispiel #4
0
    def prepare(args):
        # load database
        try:
            database = MetaDataDB.from_file(args['database'])
        except:
            print("Invalid AccessMath database file")
            return

        TangentV_Helper.VisualizerServer = database.indexing.visualization_server

        # ... Load 3D structures ....
        print("Loading CC indices per lecture ... ")
        for lecture in database.lectures:
            struct_filename = database.output_temporal + '/' + Parameters.Output_ST3D + str(lecture.id) + ".dat"
            TangentV_Helper.cache_3DSTs[lecture.title] = MiscHelper.dump_load(struct_filename)
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")

    dataset_name = config.get("SPEAKER_TRAINING_SET_NAME")
    training_set = database.datasets[dataset_name]

    # prepare the feature extractor ...
    feature_points = config.get("SPEAKER_ACTION_FEATURE_POINTS")
    segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH", 15)
    feat_extractor = PoseFeatureExtractor(feature_points, segment_length)

    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")
    os.makedirs(features_dir, exist_ok=True)

    # for each file ... get features ...
    for lecture in training_set:
        input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        output_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"

        lecture_pose_segments = MiscHelper.dump_load(input_filename)

        vid_data = feat_extractor.get_feature_dataset(lecture_pose_segments)

        MiscHelper.dump_save(vid_data, output_filename)

    return
    def ExportVideo(database,
                    lecture,
                    binary_source,
                    video_prefix,
                    invert_binary,
                    skip_interpolation=0,
                    workers=7,
                    block_size=100):
        # .... paths ...
        lecture_sufix = str(lecture.id) + ".dat"
        tempo_binary_filename = database.output_temporal + "/" + binary_source + lecture_sufix
        lecture_str = video_prefix + "_" + database.name + "_" + lecture.title.lower(
        )
        temporary_prefix = database.output_images + "/" + lecture_str + "_"
        first_video_filename = lecture.main_videos[0]["path"]

        # load binary images
        binary_data = MiscHelper.dump_load(tempo_binary_filename)
        original_frame_times, frame_indices, frame_compressed = binary_data

        print("Generating Temporary Files")
        with ThreadPoolExecutor(max_workers=workers) as executor:
            n_blocks = int(math.ceil(frame_indices[-1] / block_size))

            pref_list = [temporary_prefix] * n_blocks
            frame_list = [frame_compressed] * n_blocks
            idx_list = [frame_indices] * n_blocks
            inv_list = [invert_binary] * n_blocks
            start_list = [idx * block_size for idx in range(n_blocks)]
            block_list = [block_size] * n_blocks
            skip_list = [skip_interpolation] * n_blocks

            total_frames = n_blocks * block_size

            for idx, _ in enumerate(
                    executor.map(KeyframeExporter.ExpandGenerateSaveTemp,
                                 pref_list, frame_list, idx_list, inv_list,
                                 start_list, block_list, skip_list)):
                prc_progress = ((idx + 1) * 100) / n_blocks
                print("-> Exporting: {0:.4f}% (Block {1:d} of {2:d})".format(
                    prc_progress, idx + 1, n_blocks),
                      end="\r",
                      flush=True)

            print("", flush=True)

        # find source sampling frames per second
        capture = cv2.VideoCapture(first_video_filename)
        video_fps = capture.get(cv2.CAP_PROP_FPS)

        source_videos_str = " ".join(
            ["-i " + video["path"] for video in lecture.main_videos])
        audio_filter_complex = " ".join([
            "[{0:d}:a:0]".format(idx + 1)
            for idx in range(len(lecture.main_videos))
        ])
        audio_filter_complex += " concat=n={0:d}:v=0:a=1 [audio]".format(
            len(lecture.main_videos))
        video_output = database.output_videos + "/" + lecture_str + ".mp4"

        input_framerate = video_fps
        output_framerate = video_fps

        video_inputs = "-hwaccel dxva2 -framerate {0:.2f} -start_number 0 -i {1:s}%d.png".format(
            input_framerate, temporary_prefix)
        audio_inputs = "{0:s} -filter_complex \"{1:s}\"".format(
            source_videos_str, audio_filter_complex)
        output_flags = "-pix_fmt yuv420p -vf \"scale=trunc(iw/2)*2:trunc(ih/2)*2\" "
        output_flags += "-s:v 1920x1080 -codec:v mpeg4 -c:v libx264 -r {0:s} -shortest".format(
            str(output_framerate))

        export_command = "ffmpeg -y {0:s} {1:s} -map 0:0 -map \"[audio]\" {2:s} {3:s}"
        export_command = export_command.format(video_inputs, audio_inputs,
                                               output_flags, video_output)

        # generate video using ffmpeg ....
        print("Saving data to: " + video_output)
        print(export_command)
        os.system(export_command)

        # delete temporary images
        print("Deleting Temporary Files")
        for idx in range(total_frames):
            os.remove(temporary_prefix + str(idx) + ".png")
Beispiel #7
0
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("\n\tWhere:")
        print(
            "\tgt_lablels:\t(Optional) Set to 1 to use Ground Truth labels instead of predictions"
        )
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    valid_actions = config.get("SPEAKER_VALID_ACTIONS")

    # get the paths to the outputs from previous scripts ....
    output_dir = config.get_str("OUTPUT_PATH")
    output_segment_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")

    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    os.makedirs(output_bboxes_dir, exist_ok=True)

    remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE")
    speaker_right_handed = config.get("SPEAKER_IS_RIGHT_HANDED")

    n_joints_body = 25
    n_joints_hand = 21

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    col_name = [
        'frame_id', ('ground_truth' if use_ground_truth else 'pred_label'),
        'body_xmin', 'body_xmax', 'body_ymin', 'body_ymax', 'rh_xmin',
        'rh_xmax', 'rh_ymin', 'rh_ymax'
    ]

    segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH")

    # load data + label
    for lecture in testing_set:
        input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        lec_segments = MiscHelper.dump_load(input_filename)

        if use_ground_truth:
            labels = lec_segments.get_all_labels()
        else:
            input_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv"
            _, _, labels, _ = ResultReader.read_actions_probabilities_file(
                input_proba_filename, valid_actions)

        output_filename = output_bboxes_dir + "/" + database.name + "_" + lecture.title + ".csv"
        output_file = ResultRecorder(output_filename)
        output_file.write_headers(col_name)

        # get bbox for skeleton and right hands from all segments
        frames = []
        segment_labels = []
        body_bbox = []
        rh_bbox = []
        for ind in range(0, len(lec_segments.segments)):
            # get the pose data ...
            if not remove_confidence:
                # the data contains confidence ... which needs to be removed at this point ...
                base_pose_data = lec_segments.segments[ind].pose_data

                total_joints = n_joints_body + n_joints_hand * 2
                seg_pose_data = np.zeros(
                    (base_pose_data.shape[0], total_joints * 2),
                    dtype=base_pose_data.dtype)

                seg_pose_data[:, ::2] = base_pose_data[:, ::3]
                seg_pose_data[:, 1::2] = base_pose_data[:, 1::3]
            else:
                # confidence has been removed ....
                seg_pose_data = lec_segments.segments[ind].pose_data

            body_features = seg_pose_data[:, 0:n_joints_body * 2]
            if speaker_right_handed:
                # get right hand data
                rh_features = seg_pose_data[:,
                                            (n_joints_body + n_joints_hand) *
                                            2:]
            else:
                # use left hand data
                rh_features = seg_pose_data[:, n_joints_body *
                                            2:(n_joints_body + n_joints_hand) *
                                            2]

            # get body bboxes and add to the list ....
            temp_body_bbox = PoseSegmentData.get_bbox_frame_data(
                body_features, 2)
            body_bbox += temp_body_bbox.tolist()

            # get hand bboxes and add to the list ....
            temp_rh_bbox = PoseSegmentData.get_bbox_frame_data(rh_features, 2)
            rh_bbox += temp_rh_bbox.tolist()

            # add frame range ....
            f_start = lec_segments.segments[ind].frame_start
            f_end = lec_segments.segments[ind].frame_end
            temp_frames = list(range(f_start, f_end + 1))
            frames += temp_frames

            # add label ....
            temp_label = [[labels[ind]] for _ in range(segment_length)
                          ]  # remove seg_len, you don't need this
            segment_labels += temp_label

        paras = frames, segment_labels, body_bbox, rh_bbox
        output_file.record_results(paras)
Beispiel #8
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        print("\tgt_labels:\tuse ground truth action labels (Default= False)")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    video_metadata_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_VIDEO_META_DATA_DIR")
    fg_mask_dir = output_dir + "/" + config.get_str(
        "SPEAKER_FG_ESTIMATION_MASK_DIR")
    os.makedirs(fg_mask_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    speaker_exp_factor = config.get_float(
        "SPEAKER_FG_ESTIMATION_SPK_EXPANSION_FACTOR")
    min_mask_frames = config.get_int("SPEAKER_FG_ESTIMATION_MIN_MASK_FRAMES")
    mask_exp_radius = config.get_int(
        "SPEAKER_FG_ESTIMATION_MASK_EXPANSION_RADIUS")

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    for current_lecture in testing_set:
        bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv"
        frame_idxs, actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file(
            bbox_filename, use_ground_truth)

        info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_info = MiscHelper.dump_load(info_filename)

        fg_estimator = ForegroundEstimator(video_info["width"],
                                           video_info["height"],
                                           speaker_exp_factor, min_mask_frames,
                                           mask_exp_radius)

        fg_mask = fg_estimator.get_mask(frame_idxs, actions, body_bboxes,
                                        rh_bboxes)

        # cv2.imshow(current_lecture.id, fg_mask)
        # cv2.waitKey()

        flag, raw_data = cv2.imencode(".png", fg_mask)

        output_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(raw_data, output_filename)
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")

    action_class_output_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_OUTPUT_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")
    os.makedirs(action_class_output_dir, exist_ok=True)
    os.makedirs(action_class_probabilities_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    # load the saved model for action classification ...
    classifier_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_DIR")
    classifier_filename = classifier_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_FILENAME")
    clf = MiscHelper.dump_load(classifier_filename)

    csv_col = ['frame_start', 'frame_end', 'prediction']

    for lecture in testing_set:
        input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        output_actions_filename = action_class_output_dir + "/" + database.name + "_" + lecture.title + ".csv"
        output_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv"

        # load data ...
        data_xy = MiscHelper.dump_load(input_filename)

        # classier predict ....
        test_x = data_xy["features"]
        y_pred = clf.predict(test_x)
        y_pred_re = y_pred.reshape((y_pred.shape[0], 1))

        # save prediction result
        output_csv = ResultRecorder(output_actions_filename)
        output_csv.write_headers(csv_col)

        # the function accepts a list of columns to save on CSV ...
        # by transposing, we make the standard list of rows into a list of columns for the function ...
        paras = np.hstack((data_xy["frame_infos"], y_pred[:,
                                                          None])).transpose()
        output_csv.record_results(paras)

        # save label probabilities
        all_classes = clf.classes_
        y_prob = clf.predict_proba(test_x)
        infos = np.concatenate((y_pred_re, y_prob), axis=1)
        output_csv = ResultRecorder(output_proba_filename)
        output_csv.write_headers(csv_col + all_classes.tolist())
        # ... IDEM ....
        paras = np.hstack((data_xy["frame_infos"], infos)).transpose()
        output_csv.record_results(paras)
Beispiel #10
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    keyframes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_KEYFRAMES_DIR")
    os.makedirs(keyframes_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    for current_lecture in testing_set:
        # read segment data ....
        input_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_segment_data = MiscHelper.dump_load(input_filename)

        # key-frames that must be extracted from video ...
        segments, keyframes_per_segment = video_segment_data
        all_keyframes = []
        for segment_keyframes in keyframes_per_segment:
            all_keyframes += [
                keyframe_idx for keyframe_idx, bbox in segment_keyframes
            ]

        print("")
        print("processing: " + current_lecture.title)
        # print(all_keyframes)

        # the simple frame sampling worker ..
        worker = SimpleFrameSampler()

        # main video file names
        m_videos = [
            config.get_str("VIDEO_FILES_PATH") + "/" + video["path"]
            for video in current_lecture.main_videos
        ]

        # execute the actual process ....
        processor = SequentialVideoSampler(m_videos, all_keyframes)

        if "forced_width" in current_lecture.parameters:
            processor.force_resolution(
                current_lecture.parameters["forced_width"],
                current_lecture.parameters["forced_height"])
        processor.doProcessing(worker, 0, True)  # 0

        sampled_frame_data = worker.frame_times, worker.frame_indices, worker.compressed_frames

        # save results
        keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(sampled_frame_data, keyframes_data_filename)
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # inputs / output paths
    output_dir = config.get_str("OUTPUT_PATH")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    keyframes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_KEYFRAMES_DIR")
    fg_mask_dir = output_dir + "/" + config.get_str(
        "SPEAKER_FG_ESTIMATION_MASK_DIR")

    summaries_dir = output_dir + "/" + database.output_summaries
    os.makedirs(summaries_dir, exist_ok=True)
    summary_prefix = summaries_dir + "/" + config.get_str(
        "SPEAKER_SUMMARY_PREFIX") + "_" + database.name + "_"

    # current dataset ....
    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    print("... preparing summary generator ...")
    summ_generator = SummaryGenerator(config)

    for current_lecture in testing_set:
        print("")
        print("Processing: " + current_lecture.title)

        # get all inputs ....

        # read segment data ....
        segments_data_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_segment_data = MiscHelper.dump_load(segments_data_filename)

        # read key-frames data ...
        keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_keyframes_data = MiscHelper.dump_load(keyframes_data_filename)

        # read mask data ...
        fg_mask_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        fg_mask_png = MiscHelper.dump_load(fg_mask_filename)
        fg_mask = cv2.imdecode(fg_mask_png, cv2.IMREAD_GRAYSCALE)

        output_prefix = summary_prefix + current_lecture.title.lower()

        summ_generator.export_summary(database, current_lecture,
                                      video_segment_data, video_keyframes_data,
                                      fg_mask, output_prefix)
Beispiel #12
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        print("\tgt_labels:\tuse ground truth action labels (Default= False)")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    video_metadata_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_VIDEO_META_DATA_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")
    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    os.makedirs(temporal_segments_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    valid_actions = config.get("SPEAKER_VALID_ACTIONS")

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    for current_lecture in testing_set:
        info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + current_lecture.title + ".csv"

        video_info = MiscHelper.dump_load(info_filename)

        segmenter = VideoSegmenter.FromConfig(config, video_info["width"],
                                              video_info["height"])

        # read label data ....
        prob_info = ResultReader.read_actions_probabilities_file(
            proba_filename, valid_actions)
        segments, gt_actions, pred_actions, prob_actions = prob_info

        # read bbox data ...
        bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv"
        frame_idxs, frame_actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file(
            bbox_filename, use_ground_truth)

        # (splits_frames, video_keyframes)
        video_data = segmenter.get_keyframes(pred_actions, segments,
                                             frame_idxs, body_bboxes,
                                             rh_bboxes)

        print("")
        print("video key_frames")
        print(video_data[0])
        print(video_data[1])
        print("")

        output_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(video_data, output_filename)