Ejemplo n.º 1
0
def __get_frame_names_from_csv_file(annot_text_path,
                                    min_frames_per_video,
                                    max_frames_per_video,
                                    sampling=True):
    root_path = c.data_root_path
    counts_before = []
    counts_after = []
    count = 0
    video_frames_dict = dict()

    with open(annot_text_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            if count % 100 == 0:
                print('%d' % (count))
            count += 1

            action_strings = row['actions']
            video_id = row['id']

            if len(action_strings) == 0:
                continue

            frames_relative_root_path = 'Charades/frames/Charades_v1_rgb/%s' % (
                video_id)
            frames_root_path = '%s/%s' % (root_path, frames_relative_root_path)

            frame_names = utils.file_names(frames_root_path, nat_sorted=True)
            n_frames = len(frame_names)
            counts_before.append(n_frames)

            # 如果设置了最大和最小帧数的限制,则需要进行帧的采样
            if sampling:
                frame_names = __sample_frames(frame_names,
                                              min_frames_per_video,
                                              max_frames_per_video)

            n_frames = len(frame_names)
            counts_after.append(n_frames)

            # sample from these frames
            video_frames_dict[video_id] = frame_names  # 视频id及其对应的帧路径

    counts = np.array(counts_before)
    print('counts before')
    print(np.min(counts))
    print(np.max(counts))
    print(np.average(counts))
    print(len(np.where(counts < min_frames_per_video)[0]))
    print(len(np.where(counts < max_frames_per_video)[0]))

    counts = np.array(counts_after)
    print('counts after')
    print(np.min(counts))
    print(np.max(counts))
    print(np.average(counts))
    print(len(np.where(counts < min_frames_per_video)[0]))
    print(len(np.where(counts < max_frames_per_video)[0]))

    return video_frames_dict
Ejemplo n.º 2
0
def __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_text_path, n_frames_per_video):
    count = 0
    video_frames_dict = dict()
    #root_path = c.data_root_path
    root_path = '/content/'

    n_lines = len(open(annot_text_path).readlines())

    with open(annot_text_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            if count % 100 == 0:
                print('... %d/%d' % (count, n_lines))
            count += 1

            action_strings = row['actions']
            video_id = row['id']

            # some videos don't contain action annotation
            if len(action_strings) == 0:
                continue

            # get all frames of the video
            frames_relative_root_path = 'Charades_v1_rgb/%s' % (video_id)
            frames_root_path = '%s/%s' % (root_path, frames_relative_root_path)
            video_frame_names = utils.file_names(frames_root_path, nat_sorted=True)

            # sample from these frames
            video_frame_names = __sample_frames_for_i3d(video_frame_names, n_frames_per_video)
            n_frames = len(video_frame_names)
            assert n_frames == n_frames_per_video

            video_frames_dict[video_id] = video_frame_names

    return video_frames_dict
Ejemplo n.º 3
0
def _800_prepare_video_frames_path_dict():
    frame_relative_pathes_dict_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict.pkl')
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')
    imgs_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train')

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    video_names = np.hstack((video_names_tr, video_names_te))

    frame_relative_pathes_dict = {}
    n_videos = len(video_names)
    for idx, video_id in enumerate(video_names):
        utils.print_counter(idx, n_videos)

        person_id = video_id.split('_')[0]
        video_frames_root_path = '%s/%s/%s' % (imgs_root_path, person_id,
                                               video_id)
        video_frames_names = utils.file_names(video_frames_root_path,
                                              is_nat_sort=True)
        video_frames_names = np.array(video_frames_names)
        video_frames_relative_pathes = np.array([
            '%s/%s/%s' % (person_id, video_id, n) for n in video_frames_names
        ])
        frame_relative_pathes_dict[video_id] = video_frames_relative_pathes

    utils.pkl_dump(frame_relative_pathes_dict, frame_relative_pathes_dict_path)
Ejemplo n.º 4
0
def __sample_frames(video_relative_pathes, n_frames_per_video, model_type):
    video_frames_dict = dict()
    n_videos = len(video_relative_pathes)

    assert model_type in ['resnet', 'i3d', 'non_local']

    for idx_video, video_relative_path in enumerate(video_relative_pathes):
        utils.print_counter(idx_video, n_videos, 100)
        video_id = __video_relative_path_to_video_id(video_relative_path)

        # get all frames of the video
        frames_root_path = Pth('Breakfast/frames/%s', (video_id, ))
        video_frame_names = utils.file_names(frames_root_path,
                                             is_nat_sort=True)

        # sample from these frames
        if model_type == 'resnet':
            video_frame_names = __sample_frames_for_resnet(
                video_frame_names, n_frames_per_video)
        elif model_type == 'i3d':
            video_frame_names = __sample_frames_for_i3d(
                video_frame_names, n_frames_per_video)
        elif model_type == 'non_local':
            video_frame_names = __sample_frames_for_non_local(
                video_frame_names, n_frames_per_video)
        else:
            raise Exception('Unkonwn model type: %s' % (model_type))
        n_frames = len(video_frame_names)
        assert n_frames == n_frames_per_video

        video_frames_dict[video_id] = video_frame_names

    return video_frames_dict
Ejemplo n.º 5
0
def __get_frames_names_in_given_duration(video_id, start_time_in_sec, stop_time_in_sec):
    """
    For a given video_id with start and stop time in seconds, get the relative pathes of the related frames.
    """
    root_path = c.data_root_path
    frames_relative_root_path = 'Charades/frames/Charades_v1_rgb/%s' % (video_id)
    frames_root_path = '%s/%s' % (root_path, frames_relative_root_path)
    frame_names = utils.file_names(frames_root_path, nat_sorted=True)

    idx_start = __convert_seconds_to_frame_idx(start_time_in_sec)
    idx_stop = __convert_seconds_to_frame_idx(stop_time_in_sec)

    frame_names = frame_names[idx_start: idx_stop + 1]
    return frame_names
Ejemplo n.º 6
0
def __get_frame_names_untrimmed_from_csv_file_for_ordered(
        annot_text_path, n_frames_per_video, is_resnet=False):
    counts = []
    count = 0
    video_frames_dict = dict()

    root_path = c.data_root_path
    n_lines = len(open(annot_text_path).readlines())

    with open(annot_text_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            if count % 100 == 0:
                print('... %d/%d' % (count, n_lines))
            count += 1

            action_strings = row['actions']

            # if not action in the current video
            if len(action_strings) == 0:
                continue

            video_id = row['id']
            frames_root_path = '%s/charades/frames/Charades_v1_rgb/%s' % (
                root_path, video_id)
            video_frame_names = utils.file_names(frames_root_path,
                                                 nat_sorted=True)

            if is_resnet:
                video_frame_names = __sample_frames_ordered_for_resnet(
                    video_frame_names, n_frames_per_video)
            else:
                video_frame_names = __sample_frames_ordered(
                    video_frame_names, n_frames_per_video)
            n_frames = len(video_frame_names)
            assert n_frames == n_frames_per_video
            counts.append(n_frames)

            # sample from these frames
            video_frames_dict[video_id] = video_frame_names

    counts = np.array(counts)
    print('counts before')
    print(np.min(counts))
    print(np.max(counts))
    print(np.average(counts))
    print(len(np.where(counts < n_frames_per_video)[0]))

    return video_frames_dict
Ejemplo n.º 7
0
def __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_text_path,
                                                      n_frames_per_video):
    '''
    #video_frames_dict_tr = __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_tr_text_path, n_frames_per_video)
    :param annot_text_path: './data/Charades/annotation/Charades_v1_train.csv'
    :param n_frames_per_video: 256
    :return: 返回采样后的视频帧词典
    '''
    count = 0
    video_frames_dict = dict()
    root_path = c.data_root_path

    n_lines = len(open(annot_text_path).readlines())

    with open(annot_text_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            if count % 100 == 0:
                print('... %d/%d' % (count, n_lines))
            count += 1

            action_strings = row['actions']  #视频的动作时间标注
            video_id = row['id']  #视频的id名称

            # some videos don't contain action annotation
            if len(action_strings) == 0:
                continue

            # get all frames of the video,得到该真video_id视频真实图片所存放的位置
            frames_relative_root_path = '/home/r/renpengzhen/Datasets/Charades/Charades_v1_rgb/%s' % (
                video_id)  #得到对应视频的所有帧图片
            # frames_root_path = '%s/%s' % (root_path, frames_relative_root_path)
            #得到视频中所有图片的名字,即视频帧的名字
            video_frame_names = utils.file_names(frames_relative_root_path,
                                                 is_nat_sort=True)

            # sample from these frames,在视频中进行采样
            video_frame_names = __sample_frames_for_i3d(
                video_frame_names, n_frames_per_video)
            n_frames = len(video_frame_names)
            assert n_frames == n_frames_per_video

            #将采样后的视频帧存到词典中
            video_frames_dict[video_id] = video_frame_names

    return video_frames_dict
Ejemplo n.º 8
0
def __count_time_in_each_video(is_training=True):
    root_path = c.data_root_path
    annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (
        root_path)
    annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (
        root_path)

    annot_text_path = annot_tr_text_path if is_training else annot_te_text_path
    frames_per_instance = []
    frames_per_videos = []
    time_per_videos = []

    count = 0

    with open(annot_text_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            if count % 100 == 0:
                print('%d' % (count))
            count += 1

            action_strings = row['actions']
            action_strings_splits = action_strings.split(';')
            video_id = row['id']

            if len(action_strings) == 0:
                print('... no action for video %s' % (video_id))
                continue

            frames_relative_root_path = 'Charades/frames/Charades_v1_rgb/%s' % (
                video_id)
            frames_root_path = '%s/%s' % (root_path, frames_relative_root_path)
            frame_names = utils.file_names(frames_root_path, nat_sorted=True)

            frames_per_video = 0
            time_per_video = 0

            for action_st in action_strings_splits:
                action_splits = action_st.split(' ')
                action_start = action_splits[1]
                action_end = action_splits[2]

                action_time = float(action_end) - float(action_start)
                time_per_video += action_time

                idx_start = __convert_seconds_to_frame_idx(action_start)
                idx_stop = __convert_seconds_to_frame_idx(action_end)
                frame_names = frame_names[idx_start:idx_stop + 1]

                n_frames_per_instance = len(frame_names)
                frames_per_video += n_frames_per_instance
                frames_per_instance.append(n_frames_per_instance)

            time_per_videos.append(time_per_video)
            frames_per_videos.append(frames_per_video)

    print(frames_per_instance)
    print(len(frames_per_instance))
    print(np.sum(frames_per_instance))
    print(np.average(frames_per_instance))
    print(np.min(frames_per_instance))
    print(np.max(frames_per_instance))

    print(frames_per_videos)
    print(len(frames_per_videos))
    print(np.sum(frames_per_videos))
    print(np.average(frames_per_videos))
    print(np.min(frames_per_videos))
    print(np.max(frames_per_videos))

    print(time_per_videos)
    print(len(time_per_videos))
    print(np.sum(time_per_videos))
    print(np.average(time_per_videos))
    print(np.min(time_per_videos))
    print(np.max(time_per_videos))

    print(count)
Ejemplo n.º 9
0
def _101_prepare_action_ids():
    """
    Get list of all unit-actions and activities
    :return:
    """

    video_types = ['cam', 'webcam', 'stereo']

    videos_root_path = Pth('Breakfast/videos')
    unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl')
    activities_path = Pth('Breakfast/annotation/activities_list.pkl')

    person_names = utils.folder_names(videos_root_path, is_nat_sort=True)

    unit_actions = []
    activities = []

    # loop on persons
    for person_name in person_names:
        p_video_root_path = '%s/%s' % (videos_root_path, person_name)

        p_video_types = [
            n for n in utils.folder_names(p_video_root_path)
            if __check_correct_video_type(video_types, n)
        ]
        p_video_types = np.array(p_video_types)

        # loop on videos for each person
        for p_video_type in p_video_types:
            # get file names
            instance_video_root_path = '%s/%s' % (p_video_root_path,
                                                  p_video_type)
            instance_video_names = utils.file_names(instance_video_root_path,
                                                    is_nat_sort=True)

            # if stereo videos, consider only the first channel
            instance_video_names = [
                n for n in instance_video_names
                if utils.get_file_extension(n) == 'avi' and (
                    'stereo' not in p_video_type or 'ch0' in n)
            ]

            # append relative pathes of videos
            instance_video_relative_pathes = [
                'Breakfast/videos/%s/%s/%s' % (person_name, p_video_type, n)
                for n in instance_video_names
            ]

            # also, get ground truth for unit-actions and activities
            instance_annot_file_pathes = [
                '%s/%s.txt' %
                (instance_video_root_path, utils.remove_extension(n))
                for n in instance_video_names
            ]
            instance_unit_actions = __get_action_names_from_files(
                instance_annot_file_pathes)
            instance_activities = [
                utils.remove_extension(n).split('_')[1]
                for n in instance_video_relative_pathes
            ]

            unit_actions += instance_unit_actions
            activities += instance_activities

    activities = np.unique(activities)
    activities = natsort.natsorted(activities)
    activities = np.array(activities)

    unit_actions = np.unique(unit_actions)
    unit_actions = natsort.natsorted(unit_actions)
    unit_actions = np.array(unit_actions)

    print len(activities), len(unit_actions)
    print activities
    print unit_actions

    utils.pkl_dump(unit_actions, unit_actions_path)
    utils.pkl_dump(activities, activities_path)

    unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.txt')
    activities_path = Pth('Breakfast/annotation/activities_list.txt')
    utils.txt_dump(unit_actions, unit_actions_path)
    utils.txt_dump(activities, activities_path)
Ejemplo n.º 10
0
def _102_prepare_video_annot():
    """
    Check ground truth of each video.
    :return:
    """

    video_types = ['cam', 'webcam', 'stereo']

    videos_root_path = Pth('Breakfast/videos')
    unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl')
    activities_path = Pth('Breakfast/annotation/activities_list.pkl')

    annot_unit_actions_path = Pth(
        'Breakfast/annotation/annot_unit_actions.pkl')
    annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl')

    unit_actions = utils.pkl_load(unit_actions_path)
    activities = utils.pkl_load(activities_path)
    person_names = utils.folder_names(videos_root_path, is_nat_sort=True)

    split_ratio = 0.85
    video_relative_pathes_tr = []
    video_relative_pathes_te = []

    y_unit_actions_tr = []
    y_unit_actions_te = []

    y_activities_tr = []
    y_activities_te = []

    n_persons = len(person_names)
    n_persons_tr = int(n_persons * split_ratio)
    n_persons_te = n_persons - n_persons_tr
    person_names_tr = person_names[:n_persons_tr]
    person_names_te = person_names[n_persons_tr:]

    # loop on persons
    for person_name in person_names:
        p_video_root_path = '%s/%s' % (videos_root_path, person_name)

        p_video_types = [
            n for n in utils.folder_names(p_video_root_path)
            if __check_correct_video_type(video_types, n)
        ]
        p_video_types = np.array(p_video_types)

        # loop on videos for each person
        for p_video_type in p_video_types:
            # get file names
            instance_video_root_path = '%s/%s' % (p_video_root_path,
                                                  p_video_type)
            instance_video_names = utils.file_names(instance_video_root_path,
                                                    is_nat_sort=True)

            # if stereo videos, consider only the first channel
            instance_video_names = [
                n for n in instance_video_names
                if utils.get_file_extension(n) == 'avi' and (
                    'stereo' not in p_video_type or 'ch0' in n)
            ]

            # append relative pathes of videos
            instance_video_relative_pathes = [
                '%s/%s/%s' % (person_name, p_video_type, n)
                for n in instance_video_names
            ]

            # also, get ground truth for unit-actions and activities
            instance_activities_y, instance_unit_actions_y = __get_gt_activities_and_actions(
                instance_video_root_path, instance_video_names, activities,
                unit_actions)

            if person_name in person_names_tr:
                video_relative_pathes_tr += instance_video_relative_pathes
                y_unit_actions_tr += instance_unit_actions_y
                y_activities_tr += instance_activities_y
            else:
                video_relative_pathes_te += instance_video_relative_pathes
                y_unit_actions_te += instance_unit_actions_y
                y_activities_te += instance_activities_y

    video_relative_pathes_tr = np.array(video_relative_pathes_tr)
    video_relative_pathes_te = np.array(video_relative_pathes_te)

    y_activities_tr = np.array(y_activities_tr)
    y_activities_te = np.array(y_activities_te)

    y_unit_actions_tr = np.array(y_unit_actions_tr)
    y_unit_actions_te = np.array(y_unit_actions_te)

    print video_relative_pathes_tr.shape
    print video_relative_pathes_te.shape

    print y_activities_tr.shape
    print y_activities_te.shape

    print y_unit_actions_tr.shape
    print y_unit_actions_te.shape

    # finally, save video annotation ()
    annot_unit_action = (video_relative_pathes_tr, y_unit_actions_tr,
                         video_relative_pathes_te, y_unit_actions_te)
    annot_activities = (video_relative_pathes_tr, y_activities_tr,
                        video_relative_pathes_te, y_activities_te)
    utils.pkl_dump(annot_unit_action, annot_unit_actions_path)
    utils.pkl_dump(annot_activities, annot_activities_path)

    return