def start_image_list_preprocessing(self,
                                       src_dir,
                                       get_worker_function,
                                       get_results_function,
                                       img_extension='.png',
                                       frames_limit=0,
                                       verbose=False):
        for lecture in self.database.lectures:
            self.current_lecture = lecture
            _, out_file, skip = self.get_lecture_params(lecture)

            if skip:
                continue

            # create a worker ...
            worker = get_worker_function(self)

            # execute the actual process ....
            processor = ImageListProcessor('{}{}'.format(
                src_dir, self.current_lecture.title),
                                           img_extension=img_extension)
            if verbose:
                print('Opening exported image folder {}{}'.format(
                    src_dir, self.current_lecture.title))
            if "forced_width" in lecture.parameters:
                processor.force_resolution(lecture.parameters["forced_width"],
                                           lecture.parameters["forced_height"])
            processor.doProcessing(worker, frames_limit, verbose)  # 0

            # save results
            if self.output_temp_prefix is not None:
                results = get_results_function(worker)
                MiscHelper.dump_save(
                    results,
                    self.temp_dir + '/' + self.output_temp_prefix + out_file)
    def start_video_processing(self,
                               frames_per_second,
                               get_worker_function,
                               get_results_function,
                               frames_limit=0,
                               verbose=False,
                               force_no_seek=False):
        for lecture in self.database.lectures:
            self.current_lecture = lecture
            m_videos, out_file, skip = self.get_lecture_params(lecture)

            if skip:
                continue

            # create a worker ...
            worker = get_worker_function(self)

            # execute the actual process ....
            processor = VideoProcessor(m_videos, frames_per_second)
            if "forced_width" in lecture.parameters:
                processor.force_resolution(lecture.parameters["forced_width"],
                                           lecture.parameters["forced_height"])
            processor.doProcessing(worker, frames_limit, verbose,
                                   force_no_seek)  # 0

            # save results
            if self.output_temp_prefix is not None:
                results = get_results_function(worker)
                MiscHelper.dump_save(
                    results,
                    self.temp_dir + '/' + self.output_temp_prefix + out_file)
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config [dataset]".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        print("\tdataset:\tDataset to run (Default= Training)")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    video_metadata_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_VIDEO_META_DATA_DIR")
    os.makedirs(video_metadata_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    for current_lecture in testing_set:
        print("")
        print("processing: " + current_lecture.title)
        # print(all_keyframes)

        # the simple frame sampling worker ..
        worker = SimpleFrameSampler()

        # main video file names
        m_videos = [
            config.get_str("VIDEO_FILES_PATH") + "/" + video["path"]
            for video in current_lecture.main_videos
        ]

        video_info = {}
        if "forced_width" in current_lecture.parameters:
            video_info["width"] = current_lecture.parameters["forced_width"]
            video_info["height"] = current_lecture.parameters["forced_height"]
        else:
            # execute the actual process ....
            processor = SequentialVideoSampler(m_videos, [0])
            processor.doProcessing(worker, 0, True)  # 0

            video_info["width"] = worker.width
            video_info["height"] = worker.height

        output_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(video_info, output_filename)
Exemple #4
0
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE")
    normalization_bone = config.get("SPEAKER_NORMALIZATION_BONE")  # pair of norm factor points

    # get the paths to the outputs from previous scripts ....
    output_dir = config.get_str("OUTPUT_PATH")

    # the per lecture openpose CSV
    lecture_filename_prefix = output_dir + "/" + config.get_str("OPENPOSE_OUTPUT_DIR_CSV") + "/" + database.name + "_"

    output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")
    os.makedirs(output_segment_dir, exist_ok=True)

    segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH")

    for lecture in testing_set:
        lecture_filename = lecture_filename_prefix + lecture.title + ".csv"
        print("Loading data for: " + lecture_filename)

        # get the corresponding data for this lecture ...
        lec_segments, lecture_data = LecturePoseSegments.InitializeFromLectureFile(lecture_filename, normalization_bone,
                                                                                   remove_confidence)

        # sequential sampling for pose segments
        vid_len = lecture_data.shape[0]
        for ind in range(0, int(vid_len / segment_length)):
            f_start = ind * segment_length
            f_end = f_start + segment_length - 1
            temp_data = lecture_data[f_start:f_end + 1, :]

            lec_segments.segments.append(PoseSegmentData(f_start, f_end, None, temp_data))

        # save ....
        output_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        MiscHelper.dump_save(lec_segments, output_filename)
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")
    classifier_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_DIR")
    os.makedirs(classifier_dir, exist_ok=True)
    classifier_filename = classifier_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_FILENAME")

    dataset_name = config.get("SPEAKER_TRAINING_SET_NAME")
    training_set = database.datasets[dataset_name]
    training_titles = [lecture.title.lower() for lecture in training_set]

    # get classifier parameters
    rf_n_trees = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_TREES", 64)
    rf_depth = config.get_int("SPEAKER_ACTION_CLASSIFIER_RF_DEPTH", 16)

    # read all training data available ....
    train_dataset = {}
    for lecture in training_set:
        input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        train_dataset[lecture.title.lower()] = MiscHelper.dump_load(
            input_filename)

    train_x, train_y, train_frame_infos = PoseFeatureExtractor.combine_datasets(
        training_titles, train_dataset)

    # classify and confusion matrix part
    clf = RandomForestClassifier(n_estimators=rf_n_trees,
                                 max_depth=rf_depth,
                                 random_state=0)
    clf = clf.fit(train_x, train_y)

    MiscHelper.dump_save(clf, classifier_filename)
Exemple #6
0
    def getCrossings(self, cc, horizontal):
        if horizontal:
            step = cc.normalized.shape[1] / float(self.count_crossings + 1)
        else:
            step = cc.normalized.shape[0] / float(self.count_crossings + 1)

        counts = []
        mins = []
        maxs = []
        for i in range(self.count_crossings):
            pos = int((i + 1) * step)

            # the crossing is threated in terms of boolean intervals...
            booleans = []
            if horizontal:
                #horizontal -> y fixed and x moves
                for x in range(cc.normalized.shape[1]):
                    booleans.append(cc.normalized[pos, x] > 128.0)

            else:
                #vertical -> x fixed and y moves
                for y in range(cc.normalized.shape[0]):
                    booleans.append(cc.normalized[y, pos] > 128.0)

            # find the intervals...
            intervals = MiscHelper.findBooleanIntervals(booleans, True)

            # now, get the middle points for each interval...
            midPoints = MiscHelper.intervalMidPoints(intervals)

            # normalize values
            midPoints = MiscHelper.scaleValues(midPoints, 0,
                                               cc.normalized.shape[0] - 1, -1,
                                               1)

            counts.append(len(intervals))

            if len(intervals) > 0:
                mins.append(midPoints[0])
                maxs.append(midPoints[-1])
            else:
                mins.append(1.1)
                maxs.append(-1.1)

        counts = MiscHelper.scaleValues(counts, 0, 10, -3, 3)

        return counts + mins + maxs
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")

    dataset_name = config.get("SPEAKER_TRAINING_SET_NAME")
    training_set = database.datasets[dataset_name]

    # prepare the feature extractor ...
    feature_points = config.get("SPEAKER_ACTION_FEATURE_POINTS")
    segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH", 15)
    feat_extractor = PoseFeatureExtractor(feature_points, segment_length)

    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")
    os.makedirs(features_dir, exist_ok=True)

    # for each file ... get features ...
    for lecture in training_set:
        input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        output_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"

        lecture_pose_segments = MiscHelper.dump_load(input_filename)

        vid_data = feat_extractor.get_feature_dataset(lecture_pose_segments)

        MiscHelper.dump_save(vid_data, output_filename)

    return
    def FromUniformSample(database, lecture, step, sample_name, binary_source):
        # load output from pipeline ...
        lecture_suffix = str(lecture.id) + ".dat"

        # load binary images
        tempo_binary_filename = database.output_temporal + "/" + binary_source + lecture_suffix
        binary_data = MiscHelper.dump_load(tempo_binary_filename)
        original_frame_times, frame_indices, frame_compressed = binary_data

        # take a sample
        frame_times = [time for time in original_frame_times[::step]]
        frame_indices = [idx for idx in frame_indices[::step]]
        frame_compressed = [frame for frame in frame_compressed[::step]]

        print("Expanding loaded frames .... ")
        binary_frames = Helper.decompress_binary_images(frame_compressed)

        # segments ....
        output_prefix = database.output_summaries + "/" + sample_name + "_" + database.name + "_" + lecture.title.lower(
        )
        print("Saving data to: " + output_prefix)

        # in abs frame indices ...
        intervals = []
        abs_intervals = []
        for idx, comp_frame in enumerate(binary_frames):
            if idx == 0:
                curr_start = int(frame_indices[idx] / 2)
                abs_start = frame_times[idx] / 2.0
            else:
                curr_start = int(
                    (frame_indices[idx - 1] + frame_indices[idx]) / 2)
                abs_start = (frame_times[idx - 1] + frame_times[idx]) / 2.0

            if idx + 1 < len(frame_indices):
                curr_end = int(
                    (frame_indices[idx + 1] + frame_indices[idx]) / 2)
                abs_end = (frame_times[idx + 1] + frame_times[idx]) / 2.0
            else:
                curr_end = frame_indices[idx]
                abs_end = frame_times[idx]

            # invert binarization ...
            binary_frames[idx] = 255 - comp_frame

            intervals.append((curr_start, curr_end))
            abs_intervals.append((abs_start, abs_end))

        KeyframeExporter.Export(output_prefix, database, lecture, intervals,
                                abs_intervals, frame_indices, frame_times,
                                binary_frames)
Exemple #9
0
    def prepare(args):
        # load database
        try:
            database = MetaDataDB.from_file(args['database'])
        except:
            print("Invalid AccessMath database file")
            return

        TangentV_Helper.VisualizerServer = database.indexing.visualization_server

        # ... Load 3D structures ....
        print("Loading CC indices per lecture ... ")
        for lecture in database.lectures:
            struct_filename = database.output_temporal + '/' + Parameters.Output_ST3D + str(lecture.id) + ".dat"
            TangentV_Helper.cache_3DSTs[lecture.title] = MiscHelper.dump_load(struct_filename)
    def initialize(self):
        # load database info
        try:
            self.database = MetaDataDB.from_file(self.database_file)
        except:
            print("Invalid database file")
            return False

        self.params = MiscHelper.optional_parameters(self.raw_params, 0)

        # process the specified dataset(s)
        if "d" in self.params:
            if not isinstance(self.params["d"], list):
                self.params["d"] = [self.params["d"]]

            valid_datasets = []
            for name in self.params["d"]:
                dataset = self.database.get_dataset(name)

                if dataset is None:
                    print("Invalid Dataset name <" + name + ">")
                    return False
                else:
                    valid_datasets.append(dataset)

            self.params["d"] = valid_datasets

        # process only the specified lectures
        if "l" in self.params:
            if not isinstance(self.params["l"], list):
                self.params["l"] = [self.params["l"]]

            self.params["l"] = [name.lower() for name in self.params["l"]]

        # override the input prefix
        if "i" in self.params:
            self.input_temp_prefix = self.params["i"]

        # override the output prefix
        if "o" in self.params:
            self.input_temp_prefix = self.params["o"]

        self.temp_dir = self.database.output_temporal
        self.out_dir = self.database.output_preprocessed
        self.img_dir = self.database.output_images

        # success loading database file ..
        return True
    def selectMotionlessFrames(self):
        #maximum motion allowed
        threshold = 0

        booleans = []
        for idx, m in enumerate(self.motion_detected):
            booleans.append(m.count_changes <= threshold)

        intervals = MiscHelper.findBooleanIntervals(booleans, True)

        #only consider intervals of at least 3 frames
        candidates = []
        for ini, end in intervals:
            #check....
            if end - ini >= 2:
                #pick the frame in the middle of the interval
                middle = self.motion_detected[int((end + ini) / 2.0)]
                candidates.append((middle.video_index, middle.time))

        return candidates
    def getNonblockedIntervals(self, region_box, max_width, max_height,
                               init_index, end_time):
        #Find a frame where the found region has no motion around

        pos = init_index
        blocked_list = []

        while pos < len(self.motion_detected) and \
              self.motion_detected[pos].absolute_time < end_time:

            #check motion[pos] to see if main_region is blocked
            blocked = self.motion_detected[pos].isBlockingRegion(
                region_box, max_width, max_height, 3.0, 3.0)

            #add to boolean list
            blocked_list.append(blocked)

            pos += 1

        #now find the intervals where it is not obstruded...
        intervals = MiscHelper.findBooleanIntervals(blocked_list, False)

        return intervals
    def start_input_processing(self, process_function):
        for lecture in self.database.lectures:
            self.current_lecture = lecture
            m_videos, lecture_file, skip = self.get_lecture_params(lecture)

            if skip:
                continue

            # read temporal file
            if self.input_temp_prefix is None:
                # null-input process (convenient way to process lectures)
                input_data = None
            else:
                if not isinstance(self.input_temp_prefix, list):
                    input_data = MiscHelper.dump_load(self.temp_dir + '/' +
                                                      self.input_temp_prefix +
                                                      lecture_file)
                else:
                    input_data = []
                    for temp_prefix in self.input_temp_prefix:
                        input_data.append(
                            MiscHelper.dump_load(self.temp_dir + '/' +
                                                 temp_prefix + lecture_file))

            # execute the actual process ....
            timer = TimeHelper()
            timer.startTimer()
            results = process_function(self, input_data)
            timer.endTimer()

            print("Process Finished in: " + timer.totalElapsedStamp())

            # save results
            if self.output_temp_prefix is not None:
                if not isinstance(self.output_temp_prefix, list):
                    MiscHelper.dump_save(
                        results, self.temp_dir + '/' +
                        self.output_temp_prefix + lecture_file)
                else:
                    for out_idx, temp_prefix in enumerate(
                            self.output_temp_prefix):
                        MiscHelper.dump_save(
                            results[out_idx],
                            self.temp_dir + '/' + temp_prefix + lecture_file)
Exemple #14
0
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("\n\tWhere:")
        print(
            "\tgt_lablels:\t(Optional) Set to 1 to use Ground Truth labels instead of predictions"
        )
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    valid_actions = config.get("SPEAKER_VALID_ACTIONS")

    # get the paths to the outputs from previous scripts ....
    output_dir = config.get_str("OUTPUT_PATH")
    output_segment_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")

    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    os.makedirs(output_bboxes_dir, exist_ok=True)

    remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE")
    speaker_right_handed = config.get("SPEAKER_IS_RIGHT_HANDED")

    n_joints_body = 25
    n_joints_hand = 21

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    col_name = [
        'frame_id', ('ground_truth' if use_ground_truth else 'pred_label'),
        'body_xmin', 'body_xmax', 'body_ymin', 'body_ymax', 'rh_xmin',
        'rh_xmax', 'rh_ymin', 'rh_ymax'
    ]

    segment_length = config.get_int("SPEAKER_ACTION_SEGMENT_LENGTH")

    # load data + label
    for lecture in testing_set:
        input_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        lec_segments = MiscHelper.dump_load(input_filename)

        if use_ground_truth:
            labels = lec_segments.get_all_labels()
        else:
            input_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv"
            _, _, labels, _ = ResultReader.read_actions_probabilities_file(
                input_proba_filename, valid_actions)

        output_filename = output_bboxes_dir + "/" + database.name + "_" + lecture.title + ".csv"
        output_file = ResultRecorder(output_filename)
        output_file.write_headers(col_name)

        # get bbox for skeleton and right hands from all segments
        frames = []
        segment_labels = []
        body_bbox = []
        rh_bbox = []
        for ind in range(0, len(lec_segments.segments)):
            # get the pose data ...
            if not remove_confidence:
                # the data contains confidence ... which needs to be removed at this point ...
                base_pose_data = lec_segments.segments[ind].pose_data

                total_joints = n_joints_body + n_joints_hand * 2
                seg_pose_data = np.zeros(
                    (base_pose_data.shape[0], total_joints * 2),
                    dtype=base_pose_data.dtype)

                seg_pose_data[:, ::2] = base_pose_data[:, ::3]
                seg_pose_data[:, 1::2] = base_pose_data[:, 1::3]
            else:
                # confidence has been removed ....
                seg_pose_data = lec_segments.segments[ind].pose_data

            body_features = seg_pose_data[:, 0:n_joints_body * 2]
            if speaker_right_handed:
                # get right hand data
                rh_features = seg_pose_data[:,
                                            (n_joints_body + n_joints_hand) *
                                            2:]
            else:
                # use left hand data
                rh_features = seg_pose_data[:, n_joints_body *
                                            2:(n_joints_body + n_joints_hand) *
                                            2]

            # get body bboxes and add to the list ....
            temp_body_bbox = PoseSegmentData.get_bbox_frame_data(
                body_features, 2)
            body_bbox += temp_body_bbox.tolist()

            # get hand bboxes and add to the list ....
            temp_rh_bbox = PoseSegmentData.get_bbox_frame_data(rh_features, 2)
            rh_bbox += temp_rh_bbox.tolist()

            # add frame range ....
            f_start = lec_segments.segments[ind].frame_start
            f_end = lec_segments.segments[ind].frame_end
            temp_frames = list(range(f_start, f_end + 1))
            frames += temp_frames

            # add label ....
            temp_label = [[labels[ind]] for _ in range(segment_length)
                          ]  # remove seg_len, you don't need this
            segment_labels += temp_label

        paras = frames, segment_labels, body_bbox, rh_bbox
        output_file.record_results(paras)
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # inputs / output paths
    output_dir = config.get_str("OUTPUT_PATH")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    keyframes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_KEYFRAMES_DIR")
    fg_mask_dir = output_dir + "/" + config.get_str(
        "SPEAKER_FG_ESTIMATION_MASK_DIR")

    summaries_dir = output_dir + "/" + database.output_summaries
    os.makedirs(summaries_dir, exist_ok=True)
    summary_prefix = summaries_dir + "/" + config.get_str(
        "SPEAKER_SUMMARY_PREFIX") + "_" + database.name + "_"

    # current dataset ....
    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    print("... preparing summary generator ...")
    summ_generator = SummaryGenerator(config)

    for current_lecture in testing_set:
        print("")
        print("Processing: " + current_lecture.title)

        # get all inputs ....

        # read segment data ....
        segments_data_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_segment_data = MiscHelper.dump_load(segments_data_filename)

        # read key-frames data ...
        keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_keyframes_data = MiscHelper.dump_load(keyframes_data_filename)

        # read mask data ...
        fg_mask_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        fg_mask_png = MiscHelper.dump_load(fg_mask_filename)
        fg_mask = cv2.imdecode(fg_mask_png, cv2.IMREAD_GRAYSCALE)

        output_prefix = summary_prefix + current_lecture.title.lower()

        summ_generator.export_summary(database, current_lecture,
                                      video_segment_data, video_keyframes_data,
                                      fg_mask, output_prefix)
Exemple #16
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        print("\tgt_labels:\tuse ground truth action labels (Default= False)")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    video_metadata_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_VIDEO_META_DATA_DIR")
    fg_mask_dir = output_dir + "/" + config.get_str(
        "SPEAKER_FG_ESTIMATION_MASK_DIR")
    os.makedirs(fg_mask_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    speaker_exp_factor = config.get_float(
        "SPEAKER_FG_ESTIMATION_SPK_EXPANSION_FACTOR")
    min_mask_frames = config.get_int("SPEAKER_FG_ESTIMATION_MIN_MASK_FRAMES")
    mask_exp_radius = config.get_int(
        "SPEAKER_FG_ESTIMATION_MASK_EXPANSION_RADIUS")

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    for current_lecture in testing_set:
        bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv"
        frame_idxs, actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file(
            bbox_filename, use_ground_truth)

        info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_info = MiscHelper.dump_load(info_filename)

        fg_estimator = ForegroundEstimator(video_info["width"],
                                           video_info["height"],
                                           speaker_exp_factor, min_mask_frames,
                                           mask_exp_radius)

        fg_mask = fg_estimator.get_mask(frame_idxs, actions, body_bboxes,
                                        rh_bboxes)

        # cv2.imshow(current_lecture.id, fg_mask)
        # cv2.waitKey()

        flag, raw_data = cv2.imencode(".png", fg_mask)

        output_filename = fg_mask_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(raw_data, output_filename)
Exemple #17
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    keyframes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_KEYFRAMES_DIR")
    os.makedirs(keyframes_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    for current_lecture in testing_set:
        # read segment data ....
        input_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        video_segment_data = MiscHelper.dump_load(input_filename)

        # key-frames that must be extracted from video ...
        segments, keyframes_per_segment = video_segment_data
        all_keyframes = []
        for segment_keyframes in keyframes_per_segment:
            all_keyframes += [
                keyframe_idx for keyframe_idx, bbox in segment_keyframes
            ]

        print("")
        print("processing: " + current_lecture.title)
        # print(all_keyframes)

        # the simple frame sampling worker ..
        worker = SimpleFrameSampler()

        # main video file names
        m_videos = [
            config.get_str("VIDEO_FILES_PATH") + "/" + video["path"]
            for video in current_lecture.main_videos
        ]

        # execute the actual process ....
        processor = SequentialVideoSampler(m_videos, all_keyframes)

        if "forced_width" in current_lecture.parameters:
            processor.force_resolution(
                current_lecture.parameters["forced_width"],
                current_lecture.parameters["forced_height"])
        processor.doProcessing(worker, 0, True)  # 0

        sampled_frame_data = worker.frame_times, worker.frame_indices, worker.compressed_frames

        # save results
        keyframes_data_filename = keyframes_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(sampled_frame_data, keyframes_data_filename)
Exemple #18
0
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    unique_label = config.get("SPEAKER_VALID_ACTIONS")

    dataset_name = config.get("SPEAKER_TRAINING_SET_NAME")

    training_set = database.datasets[dataset_name]

    remove_confidence = config.get("SPEAKER_REMOVE_JOINT_CONFIDENCE")
    normalization_bone = config.get("SPEAKER_NORMALIZATION_BONE")  # pair of norm factor points

    # get the paths to the outputs from previous scripts ....
    output_dir = config.get_str("OUTPUT_PATH")
    action_object_name = config.get_str("SPEAKER_ACTION_MAIN_OBJECT", "speaker")
    action_segment_output_dir = config.get_str("SPEAKER_ACTION_SEGMENT_OUTPUT_DIR", ".")
    segments_output_prefix = output_dir + "/" + action_segment_output_dir + "/" + database.name + "_"

    # the per lecture openpose CSV
    lecture_filename_prefix = output_dir + "/" + config.get_str("OPENPOSE_OUTPUT_DIR_CSV") + "/" + database.name + "_"

    output_segment_dir = output_dir + "/" + config.get("SPEAKER_ACTION_SEGMENT_POSE_DATA_OUTPUT_DIR")
    os.makedirs(output_segment_dir, exist_ok=True)

    # First .... cache all OpenPose CSV data per training lecture ....
    data_per_lecture = {}
    for lecture in training_set:
        lecture_filename = lecture_filename_prefix + lecture.title + ".csv"
        print("Loading data for: " + lecture_filename)

        segments, data = LecturePoseSegments.InitializeFromLectureFile(lecture_filename, normalization_bone,
                                                                       remove_confidence)

        data_per_lecture[lecture.title.lower()] = {
            "segments": segments,
            "data": data
        }

    # read the training frame segments info file
    segment_filename = segments_output_prefix + dataset_name + "_" + action_object_name + ".csv"
    speaker_seg_train = pd.read_csv(segment_filename)  # frame segment info of training data of object speaker
    speaker_seg_train = speaker_seg_train.values

    # Split the OpenPose Data based on the given segments ...
    for vid_name, f_start, f_end, label in speaker_seg_train:
        vid_name = vid_name.lower()
        # print((vid_name, f_start, f_end, label))

        # if label is not in the main 8 labels, omit it
        if label not in unique_label:
            continue

        if not vid_name in data_per_lecture:
            print("Invalid lecture name found: " + vid_name)
            continue

        temp_data = data_per_lecture[vid_name]["data"][f_start:f_end + 1, :]

        temp_pose_segment_data = PoseSegmentData(f_start, f_end, label, temp_data)
        data_per_lecture[vid_name]["segments"].segments.append(temp_pose_segment_data)

    # save to file ...
    for lecture in training_set:
        output_filename = output_segment_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        MiscHelper.dump_save(data_per_lecture[lecture.title.lower()]["segments"], output_filename)

    print("Data Segment Saving Done.")
    return
Exemple #19
0
    def computeVisualAlignment(m_videos, a_videos, time_offset, motionless,
                               save_frames, extraction_method_id):
        #distribute the selection of motionless frames...
        selected = MiscHelper.distribute_values(Aligner.ALIGNMENT_SAMPLE, 0,
                                                len(motionless) - 1)

        #create the list..
        frame_list = []
        for idx in selected:
            frame_list.append(motionless[idx])

        #extract the motionless frames from main videos
        frames = Loader.extractFramesRelative(m_videos, frame_list)
        if save_frames:
            for idx, f in enumerate(frames):
                abs_time, frame = f
                cv2.imwrite("out/main_" + str(idx) + ".jpg", frame)

        #calculate the absolute time for the corresponding frames
        #on the auxiliar video. Consider the time difference between videos
        times = [(abs_time - time_offset) for abs_time, frame in frames]

        #extract the motionless frames from auxiliar videos
        aux_frames = Loader.extractFramesAbsolute(a_videos, times)
        if save_frames:
            for idx, frame in enumerate(aux_frames):
                cv2.imwrite("out/auxiliar_" + str(idx) + ".jpg", frame)

        #find the visual correspondence between pairs of key frames
        matches_aux = []
        matches_main = []
        aux_boxes = []
        main_boxes = []

        all_content_main = []
        all_content_aux = []
        #...first... extract the content from each pair of frames...
        for i in range(min(Aligner.ALIGNMENT_SAMPLE, len(frames))):

            #get the current key frames
            abs_time, frame_main = frames[i]
            frame_aux = aux_frames[i]

            print("Extracting content #" + str(i + 1) + " ... (Main: " +
                  TimeHelper.stampToStr(abs_time) + " - Aux: " +
                  TimeHelper.stampToStr(times[i]) + ")")

            #from the main key frame, extract content on the board
            main_box, content_main = Binarizer.frameContentBinarization(
                frame_main, extraction_method_id)
            main_boxes.append(main_box)

            #from the auxiliary key frame, extract content on the board
            aux_box, content_aux = Binarizer.frameContentBinarization(
                frame_aux, extraction_method_id)
            aux_boxes.append(aux_box)

            #add to list...
            all_content_main.append(content_main)
            all_content_aux.append(content_aux)

        #...then, extract the alignment.... keep highest score...
        all_scores = []
        for i in range(min(Aligner.ALIGNMENT_SAMPLE, len(frames))):
            print("Testing Alignment #" + str(i + 1) + " ... ")

            #corresponding frames....
            content_aux = all_content_aux[i]
            content_main = all_content_main[i]

            #Extract a set of good matches between these two images....
            # where object = aux content from mimio, to align with main content
            #       scene = main content to which the change regions will be projected
            aux_list, main_list = VisualAlignment.getSURFMatchingPoints(
                content_aux, content_main, Aligner.SURF_THRESHOLD)

            #generate projection based on these points...
            current_projection, mask = VisualAlignment.generateProjection(
                aux_list, main_list)
            #calculate score...
            score = VisualAlignment.getProjectionScore(current_projection,
                                                       all_content_main,
                                                       all_content_aux)

            #print( str(i) + " => " + str(score) )
            all_scores.append((score, i, current_projection))

            #add to the total list of points...
            matches_aux.append(aux_list)
            matches_main.append(main_list)

            #print( "ON " + str(i) + " where found " +  str(len(aux_list) ) + " matches" )

        all_scores = sorted(all_scores, reverse=True)

        #current best projection is the one with the top score...
        max_score = all_scores[0][0]
        all_matches_aux = matches_aux[all_scores[0][1]]
        all_matches_main = matches_main[all_scores[0][1]]
        best_projection = all_scores[0][2]

        #now, try to improve the quality of the projection by adding some keypoints from
        #candidate alignments with high scores and computing a new combined projection
        #for the list of combined keypoint matches...
        new_score = max_score
        pos = 1
        while new_score >= max_score and pos < len(all_scores):
            #add keypoints to the combined list...
            current_aux = all_matches_aux + matches_aux[all_scores[pos][1]]
            current_main = all_matches_main + matches_main[all_scores[pos][1]]

            #generate the new projection...
            current_projection, mask = VisualAlignment.generateProjection(
                current_aux, current_main)

            #get score for combined projection...
            new_score = VisualAlignment.getProjectionScore(
                current_projection, all_content_main, all_content_aux)

            #check if score improved...
            if new_score >= max_score:
                #new best projection found....
                max_score = new_score
                all_matches_aux += aux_list[all_scores[pos][1]]
                all_matches_main += main_list[all_scores[pos][1]]

                best_projection = current_projection
                pos += 1

        #Get the final alignment
        projection = best_projection

        print("Best Alignment Score: " + str(max_score))
        """
        # Un-comment to output alignment images
        for i in range(len(all_content_main)):
            content_main = all_content_main[i]
            content_aux = all_content_aux[i]

            proj_img = np.zeros( (content_main.shape[0], content_main.shape[1]), dtype=content_main.dtype )
            cv.WarpPerspective( cv.fromarray( content_aux ), cv.fromarray(proj_img), cv.fromarray( projection ) )

            result_image = np.zeros( (content_main.shape[0], content_main.shape[1], 3) )
            result_image[:,:,2] = content_main
            result_image[:,:,1] = proj_img

            #cv2.imshow('img',result_image)
            cv2.imwrite( 'DEBUG_MAIN_' + str(i) + '.bmp', content_main )
            cv2.imwrite( 'DEBUG_AUX_' + str(i) + '.bmp', content_aux )
            cv2.imwrite( 'DEBUG_PROJECTION_' + str(i) + '.bmp' , result_image )
        """

        #average of the boxes of the whiteboard
        main_box = MiscHelper.averageBoxes(main_boxes)
        aux_box = MiscHelper.averageBoxes(aux_boxes)

        #store them in a single object...
        visual_alignment = VisualAlignment()
        # ... main size...
        visual_alignment.main_width = frames[0][1].shape[1]
        visual_alignment.main_height = frames[0][1].shape[0]
        #.... main board box ...
        visual_alignment.main_box = main_box
        # ... aux size ....
        visual_alignment.aux_width = aux_frames[0].shape[1]
        visual_alignment.aux_height = aux_frames[0].shape[0]
        #... aux board box...
        visual_alignment.aux_box = aux_box
        #... projection ....
        visual_alignment.projection = projection

        return visual_alignment
def main():
    if len(sys.argv) < 2:
        print("Usage")
        print("\tpython {0:s} config".format(sys.argv[0]))
        return

    # initialization #
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    # get paths and other configuration parameters ....
    output_dir = config.get_str("OUTPUT_PATH")
    features_dir = output_dir + "/" + config.get("SPEAKER_ACTION_FEATURES_DIR")

    action_class_output_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_OUTPUT_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")
    os.makedirs(action_class_output_dir, exist_ok=True)
    os.makedirs(action_class_probabilities_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    # load the saved model for action classification ...
    classifier_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_DIR")
    classifier_filename = classifier_dir + "/" + config.get_str(
        "SPEAKER_ACTION_CLASSIFIER_FILENAME")
    clf = MiscHelper.dump_load(classifier_filename)

    csv_col = ['frame_start', 'frame_end', 'prediction']

    for lecture in testing_set:
        input_filename = features_dir + "/" + database.name + "_" + lecture.title + ".pickle"
        output_actions_filename = action_class_output_dir + "/" + database.name + "_" + lecture.title + ".csv"
        output_proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + lecture.title + ".csv"

        # load data ...
        data_xy = MiscHelper.dump_load(input_filename)

        # classier predict ....
        test_x = data_xy["features"]
        y_pred = clf.predict(test_x)
        y_pred_re = y_pred.reshape((y_pred.shape[0], 1))

        # save prediction result
        output_csv = ResultRecorder(output_actions_filename)
        output_csv.write_headers(csv_col)

        # the function accepts a list of columns to save on CSV ...
        # by transposing, we make the standard list of rows into a list of columns for the function ...
        paras = np.hstack((data_xy["frame_infos"], y_pred[:,
                                                          None])).transpose()
        output_csv.record_results(paras)

        # save label probabilities
        all_classes = clf.classes_
        y_prob = clf.predict_proba(test_x)
        infos = np.concatenate((y_pred_re, y_prob), axis=1)
        output_csv = ResultRecorder(output_proba_filename)
        output_csv.write_headers(csv_col + all_classes.tolist())
        # ... IDEM ....
        paras = np.hstack((data_xy["frame_infos"], infos)).transpose()
        output_csv.record_results(paras)
    def ExportVideo(database,
                    lecture,
                    binary_source,
                    video_prefix,
                    invert_binary,
                    skip_interpolation=0,
                    workers=7,
                    block_size=100):
        # .... paths ...
        lecture_sufix = str(lecture.id) + ".dat"
        tempo_binary_filename = database.output_temporal + "/" + binary_source + lecture_sufix
        lecture_str = video_prefix + "_" + database.name + "_" + lecture.title.lower(
        )
        temporary_prefix = database.output_images + "/" + lecture_str + "_"
        first_video_filename = lecture.main_videos[0]["path"]

        # load binary images
        binary_data = MiscHelper.dump_load(tempo_binary_filename)
        original_frame_times, frame_indices, frame_compressed = binary_data

        print("Generating Temporary Files")
        with ThreadPoolExecutor(max_workers=workers) as executor:
            n_blocks = int(math.ceil(frame_indices[-1] / block_size))

            pref_list = [temporary_prefix] * n_blocks
            frame_list = [frame_compressed] * n_blocks
            idx_list = [frame_indices] * n_blocks
            inv_list = [invert_binary] * n_blocks
            start_list = [idx * block_size for idx in range(n_blocks)]
            block_list = [block_size] * n_blocks
            skip_list = [skip_interpolation] * n_blocks

            total_frames = n_blocks * block_size

            for idx, _ in enumerate(
                    executor.map(KeyframeExporter.ExpandGenerateSaveTemp,
                                 pref_list, frame_list, idx_list, inv_list,
                                 start_list, block_list, skip_list)):
                prc_progress = ((idx + 1) * 100) / n_blocks
                print("-> Exporting: {0:.4f}% (Block {1:d} of {2:d})".format(
                    prc_progress, idx + 1, n_blocks),
                      end="\r",
                      flush=True)

            print("", flush=True)

        # find source sampling frames per second
        capture = cv2.VideoCapture(first_video_filename)
        video_fps = capture.get(cv2.CAP_PROP_FPS)

        source_videos_str = " ".join(
            ["-i " + video["path"] for video in lecture.main_videos])
        audio_filter_complex = " ".join([
            "[{0:d}:a:0]".format(idx + 1)
            for idx in range(len(lecture.main_videos))
        ])
        audio_filter_complex += " concat=n={0:d}:v=0:a=1 [audio]".format(
            len(lecture.main_videos))
        video_output = database.output_videos + "/" + lecture_str + ".mp4"

        input_framerate = video_fps
        output_framerate = video_fps

        video_inputs = "-hwaccel dxva2 -framerate {0:.2f} -start_number 0 -i {1:s}%d.png".format(
            input_framerate, temporary_prefix)
        audio_inputs = "{0:s} -filter_complex \"{1:s}\"".format(
            source_videos_str, audio_filter_complex)
        output_flags = "-pix_fmt yuv420p -vf \"scale=trunc(iw/2)*2:trunc(ih/2)*2\" "
        output_flags += "-s:v 1920x1080 -codec:v mpeg4 -c:v libx264 -r {0:s} -shortest".format(
            str(output_framerate))

        export_command = "ffmpeg -y {0:s} {1:s} -map 0:0 -map \"[audio]\" {2:s} {3:s}"
        export_command = export_command.format(video_inputs, audio_inputs,
                                               output_flags, video_output)

        # generate video using ffmpeg ....
        print("Saving data to: " + video_output)
        print(export_command)
        os.system(export_command)

        # delete temporary images
        print("Deleting Temporary Files")
        for idx in range(total_frames):
            os.remove(temporary_prefix + str(idx) + ".png")
Exemple #22
0
def main():
    # usage check
    if len(sys.argv) < 2:
        print("Usage:")
        print("")
        print("\tpython {0:s} config [gt_labels]".format(sys.argv[0]))
        print("")
        print("Where")
        print("\tconfig:\tPath to AccessMath configuration file")
        print("\tgt_labels:\tuse ground truth action labels (Default= False)")
        return

    # read the configuration file ....
    config = Configuration.from_file(sys.argv[1])

    try:
        database = MetaDataDB.from_file(config.get_str("VIDEO_DATABASE_PATH"))
    except:
        print("Invalid AccessMath Database file")
        return

    output_dir = config.get_str("OUTPUT_PATH")
    video_metadata_dir = output_dir + "/" + config.get_str(
        "SPEAKER_ACTION_VIDEO_META_DATA_DIR")
    action_class_probabilities_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_PROBABILITIES_DIR")
    output_bboxes_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_CLASSIFICATION_BBOXES_DIR")
    temporal_segments_dir = output_dir + "/" + config.get(
        "SPEAKER_ACTION_TEMPORAL_SEGMENTS_DIR")
    os.makedirs(temporal_segments_dir, exist_ok=True)

    dataset_name = config.get("SPEAKER_TESTING_SET_NAME")
    testing_set = database.datasets[dataset_name]

    valid_actions = config.get("SPEAKER_VALID_ACTIONS")

    if len(sys.argv) >= 3:
        use_ground_truth = int(sys.argv[2]) > 0
    else:
        use_ground_truth = False

    for current_lecture in testing_set:
        info_filename = video_metadata_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        proba_filename = action_class_probabilities_dir + "/" + database.name + "_" + current_lecture.title + ".csv"

        video_info = MiscHelper.dump_load(info_filename)

        segmenter = VideoSegmenter.FromConfig(config, video_info["width"],
                                              video_info["height"])

        # read label data ....
        prob_info = ResultReader.read_actions_probabilities_file(
            proba_filename, valid_actions)
        segments, gt_actions, pred_actions, prob_actions = prob_info

        # read bbox data ...
        bbox_filename = output_bboxes_dir + "/" + database.name + "_" + current_lecture.title + ".csv"
        frame_idxs, frame_actions, body_bboxes, rh_bboxes = ResultReader.read_bbox_file(
            bbox_filename, use_ground_truth)

        # (splits_frames, video_keyframes)
        video_data = segmenter.get_keyframes(pred_actions, segments,
                                             frame_idxs, body_bboxes,
                                             rh_bboxes)

        print("")
        print("video key_frames")
        print(video_data[0])
        print(video_data[1])
        print("")

        output_filename = temporal_segments_dir + "/" + database.name + "_" + current_lecture.title + ".pickle"
        MiscHelper.dump_save(video_data, output_filename)