def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False, is_binarize_output=False):
        """
        :param n_timesteps:  How many timesteps per video.
        :param is_random_tr: Sample random or uniform frames.
        :param is_random_te: Sample random or uniform frames.
        :param is_shuffle_tr: To shuffle data or not.
        :param is_shuffle_te: To shuffle data or not.
        """

        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
        self.__is_shuffle_tr = is_shuffle_tr
        self.__is_shuffle_te = is_shuffle_te
        self.__n_timesteps = n_timesteps

        self.__n_frames_per_segment = 8
        self.__n_frames = self.__n_timesteps * self.__n_frames_per_segment

        gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
        frames_dict_path = Pth('Breakfast/annotation/frames_dict.pkl')

        (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path)
        (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path)

        if is_binarize_output:
            classes = np.arange(0, 10)
            self.__y_tr = utils.binarize_label(self.__y_tr, classes)
            self.__y_te = utils.binarize_label(self.__y_te, classes)
Exemple #2
0
def _05_visualize_attention_values():
    # load data
    n_timesteps = 64
    n_centroids = 128

    model_name = 'classifier_19.02.21-01:00:30'
    features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,))
    gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
    frames_annot_path = Pth('Breakfast/annotation/annot_frames_i3d_%d.pkl', (512,))
    attention_values_path = Pth('Breakfast/qualitative_results/node_attention_%s.pkl', (model_name,))

    n_classes = ds_breakfast.N_CLASSES_ACTIVITIES
    frames_annot = utils.pkl_load(frames_annot_path)
    (video_ids_tr, y_tr), (video_ids_te, y_te) = utils.pkl_load(gt_activities_path)
    y_tr = utils.debinarize_label(y_tr)
    y_te = utils.debinarize_label(y_te)

    (att_tr, att_te) = utils.pkl_load(attention_values_path)  # (1357, 64, 128), (355, 64, 128)

    attentions_tr = np.array([np.average(att_tr[np.where(y_tr == idx_class)[0]], axis=(0, 1)) for idx_class in range(n_classes)])  # (10, 128)
    attentions_te = np.array([np.average(att_te[np.where(y_te == idx_class)[0]], axis=(0, 1)) for idx_class in range(n_classes)])  # (10, 128)

    # remove least attended centroids
    all_attn_vals = np.mean(attentions_tr, axis=1)

    _ = 10
    def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False):
        """
        :param n_timesteps:  How many timesteps per video.
        :param is_random_tr: Sample random or uniform frames.
        :param is_random_te: Sample random or uniform frames.
        :param is_shuffle_tr: To shuffle data or not.
        :param is_shuffle_te: To shuffle data or not.
        """

        frames_dict_path = Pth('Charades/annotation/frames_dict_all_frames.pkl')
        annotation_path = Pth('Charades/annotation/video_annotation.pkl')

        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
        self.__is_shuffle_tr = is_shuffle_tr
        self.__is_shuffle_te = is_shuffle_te
        self.__n_timesteps = n_timesteps

        self.__n_frames_per_segment = 8
        self.__n_frames = self.__n_timesteps * self.__n_frames_per_segment

        (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path)
        (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(annotation_path)

        self.current_train = None
        self.current_test = None
Exemple #4
0
def _804_random_sample_frames_for_i3d_test_video_level_by_split(
        split_type='train'):

    assert split_type in ['train', 'test'
                          ], 'Sorry, unknown split type: %s' % (split_type)
    is_train = split_type == 'train'

    file_name_suffix = 'tr' if is_train else 'te'
    root_path_id = 0 if configs.is_local_machine() else 5

    frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train',
                           root_type=c.ROOT_PATH_TYPES[root_path_id])
    frame_relative_pathes_dict_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict_%s.pkl',
        (file_name_suffix, ))
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    video_names = video_names_tr if is_train else video_names_te
    del video_names_tr
    del video_names_te

    frame_relative_pathes_dict = utils.pkl_load(
        frame_relative_pathes_dict_path)

    # loop on the videos and sample frames for i3d
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    sampled_frames = __random_sample_frames_per_video_for_i3d(
        video_names, frames_root_path, frame_relative_pathes_dict,
        n_frames_per_segment, n_frames_per_video)

    return sampled_frames
Exemple #5
0
def _802_uniform_sample_frames_for_i3d_test_video_level():
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')
    frame_relative_pathes_dict_tr_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict_tr.pkl')
    frame_relative_pathes_dict_te_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict_te.pkl')
    sampled_frames_relative_pathes = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_uniform_sample.pkl')

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    frame_relative_pathes_dict_tr = utils.pkl_load(
        frame_relative_pathes_dict_tr_path)
    frame_relative_pathes_dict_te = utils.pkl_load(
        frame_relative_pathes_dict_te_path)

    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment

    sampled_frames_tr = __uniform_sample_frames_per_video_for_i3d(
        video_names_tr, frame_relative_pathes_dict_tr, n_frames_per_segment,
        n_frames_per_video)
    sampled_frames_te = __uniform_sample_frames_per_video_for_i3d(
        video_names_te, frame_relative_pathes_dict_te, n_frames_per_segment,
        n_frames_per_video)

    data = (sampled_frames_tr, sampled_frames_te)
    utils.pkl_dump(data, sampled_frames_relative_pathes)
Exemple #6
0
def _803_random_sample_frames_for_i3d_test_video_level():
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')
    frame_relative_pathes_dict_tr_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict_tr.pkl')
    frame_relative_pathes_dict_te_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict_te.pkl')

    root_path_id = 0 if configs.is_local_machine() else 5
    frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train',
                           root_type=c.ROOT_PATH_TYPES[root_path_id])

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    frame_relative_pathes_dict_tr = utils.pkl_load(
        frame_relative_pathes_dict_tr_path)
    frame_relative_pathes_dict_te = utils.pkl_load(
        frame_relative_pathes_dict_te_path)

    # loop on the videos and sample frames for i3d

    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment

    sampled_frames_tr = __random_sample_frames_per_video_for_i3d(
        video_names_tr, frames_root_path, frame_relative_pathes_dict_tr,
        n_frames_per_segment, n_frames_per_video)
    sampled_frames_te = __random_sample_frames_per_video_for_i3d(
        video_names_te, frames_root_path, frame_relative_pathes_dict_te,
        n_frames_per_segment, n_frames_per_video)

    return (sampled_frames_tr, sampled_frames_te)
    def __init__(self, n_timesteps, n_timesteps_total, featurenet_type, x_heavy_path, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False):
        """
        :param n_timesteps:  How many timesteps per video.
        :param is_random_tr: Sample random or uniform frames.
        :param is_random_te: Sample random or uniform frames.
        :param is_shuffle_tr: To shuffle data or not.
        :param is_shuffle_te: To shuffle data or not.
        """

        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
        self.__is_shuffle_tr = is_shuffle_tr
        self.__is_shuffle_te = is_shuffle_te
        self.__n_timesteps = n_timesteps
        self.__n_timesteps_total = n_timesteps_total

        n_frames_per_segment = utils.get_model_n_frames_per_segment(featurenet_type)
        n_frames = n_timesteps_total * n_frames_per_segment

        gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
        frames_annot_path = Pth('Breakfast/annotation/annot_frames_%s_%d.pkl', (featurenet_type, n_frames,))

        (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path)

        (x_heavy_tr, x_heavy_te) = utils.h5_load_multi(x_heavy_path, ['x_tr', 'x_te'])  # (B, C, T, H, W)
        self.__x_heavy_tr = x_heavy_tr
        self.__x_heavy_te = x_heavy_te

        # select middle frame from each snippet
        (frames_dict_tr, frames_dict_te) = utils.pkl_load(frames_annot_path)
        frames_dict_tr = self.__select_middle_frame(frames_dict_tr, n_frames_per_segment)
        frames_dict_te = self.__select_middle_frame(frames_dict_te, n_frames_per_segment)
        self.__frames_dict_tr = frames_dict_tr
        self.__frames_dict_te = frames_dict_te
Exemple #8
0
def _703_prepare_data_splits():
    """
    Sample fram pathes for the i3d model.
    :return:
    """

    annot_dict_path = Pth(
        'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl')
    annot_idxes_many_shots_path = Pth(
        'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl')
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')

    annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path)
    annot_dict = utils.pkl_load(annot_dict_path)

    # split_ratio
    split_ratio = 0.8
    person_videos_dict = {}

    # first loop to collect all unique video ids
    for annot_id in annot_idxes_many_shots:
        annot_line = annot_dict[annot_id]
        person_id = annot_line[0]
        video_id = annot_line[1]
        if person_id not in person_videos_dict:
            person_videos_dict[person_id] = []

        person_videos_dict[person_id].append(video_id)

    for person_id in person_videos_dict:
        video_names = natsort.natsorted(
            np.unique(person_videos_dict[person_id]))
        person_videos_dict[person_id] = video_names

    # now that we have collected the persons, and their videos, see how much videos if we split
    video_names_tr = []
    video_names_te = []

    for person_id in person_videos_dict:
        v_names = person_videos_dict[person_id]
        idx = int(len(v_names) * split_ratio)
        v_names_tr = v_names[:idx]
        v_names_te = v_names[idx:]
        video_names_tr += v_names_tr
        video_names_te += v_names_te

    video_names_tr = np.array(video_names_tr)
    video_names_te = np.array(video_names_te)

    print len(video_names_tr) + len(video_names_te)
    print len(video_names_tr)
    print len(video_names_te)

    # save video names
    utils.pkl_dump((video_names_tr, video_names_te), video_names_splits_path)
    def __read_features(self, params):

        features_path, idx_feature, idx_timestep = params
        frame_feature = utils.pkl_load(features_path)

        # add current feature to the list
        self.__features[idx_feature, idx_timestep] = frame_feature
    def __load_features(self, params):

        idx_video = params[0]
        feats_path = params[1]

        try:
            # load feature from file
            feats = utils.pkl_load(feats_path)
            n_feats = len(feats)

            if self.__feat_map_side_dim == 1:
                feats = np.expand_dims(feats, 1)
                feats = np.expand_dims(feats, 1)

            # some videos have frames less than required, then take all features
            # and zero-pad (may be repeat the frames?) till the required is satisfied
            if n_feats < self.__n_frames_per_video:
                feats = self.__pad_video_feats(feats)
            else:
                # randomly sample only n frames
                idx_feats = np.arange(n_feats)
                np.random.shuffle(idx_feats)
                idx_feats = idx_feats[:self.__n_frames_per_video]
                feats = feats[idx_feats]

            assert len(feats) == self.__n_frames_per_video

            # as float
            feats = feats.astype(np.float32)
            self.__batch_features[idx_video] = feats
            _ = 10
        except Exception as exp:
            print('\nSorry, error in loading feature %s' % (feats_path))
            print(exp)
Exemple #11
0
def _800_prepare_video_frames_path_dict():
    frame_relative_pathes_dict_path = Pth(
        'EPIC-Kitchens/annotations/frame_relative_pathes_dict.pkl')
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')
    imgs_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train')

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    video_names = np.hstack((video_names_tr, video_names_te))

    frame_relative_pathes_dict = {}
    n_videos = len(video_names)
    for idx, video_id in enumerate(video_names):
        utils.print_counter(idx, n_videos)

        person_id = video_id.split('_')[0]
        video_frames_root_path = '%s/%s/%s' % (imgs_root_path, person_id,
                                               video_id)
        video_frames_names = utils.file_names(video_frames_root_path,
                                              is_nat_sort=True)
        video_frames_names = np.array(video_frames_names)
        video_frames_relative_pathes = np.array([
            '%s/%s/%s' % (person_id, video_id, n) for n in video_frames_names
        ])
        frame_relative_pathes_dict[video_id] = video_frames_relative_pathes

    utils.pkl_dump(frame_relative_pathes_dict, frame_relative_pathes_dict_path)
Exemple #12
0
def __count_how_many_videos_per_class():
    root_path = c.data_root_path
    annotation_path = '%s/Charades/annotation/video_annotation.pkl' % (
        root_path)
    (video_id_tr, y_tr, video_id_te, y_te) = utils.pkl_load(annotation_path)
    n_classes = N_CLASSES

    counts_tr = []
    counts_te = []

    for i in range(n_classes):
        counts_tr.append(len(np.where(y_tr == i + 1)[0]))
        counts_te.append(len(np.where(y_te == i + 1)[0]))

    counts_tr = np.array(counts_tr)
    counts_te = np.array(counts_te)

    idx = np.argsort(counts_tr)[::-1]
    counts_tr = counts_tr[idx]
    counts_te = counts_te[idx]

    counts = np.array([counts_tr, counts_te])
    print(counts_tr)
    print(counts_te)
    utils.plot_multi(counts, title='Counts')
Exemple #13
0
def _704_prepare_many_shots_noun_verb_action_ids():
    """
    Prepeare two dicts of nouns and verbs to convert from id to many_shot id. All ids are zero-indexed.
    71 noun classes
    26 verb classes
    xx actions
    :return:
    """

    annot_dict_path = Pth(
        'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl')
    annot_idxes_many_shots_path = Pth(
        'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl')
    noun_ids_many_shots_dict_path = Pth(
        'EPIC-Kitchens/annotations/noun_ids_many_shots_dict.pkl')
    verb_ids_many_shots_dict_path = Pth(
        'EPIC-Kitchens/annotations/verb_ids_many_shots_dict.pkl')
    actn_ids_many_shots_dict_path = Pth(
        'EPIC-Kitchens/annotations/actn_ids_many_shots_dict.pkl')
    actn_ids_many_shots_list_path = Pth(
        'EPIC-Kitchens/annotations//EPIC_many_shot_actions.csv')

    annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path)
    annot_dict = utils.pkl_load(annot_dict_path)

    # get all verb_ids, noun_ids
    noun_ids = [
        annot_dict[annot_id][10] for annot_id in annot_idxes_many_shots
    ]
    verb_ids = [annot_dict[annot_id][8] for annot_id in annot_idxes_many_shots]
    actn_ids = __get_action_ids_from_annotation(actn_ids_many_shots_list_path)

    noun_ids = np.sort(np.unique(noun_ids))
    verb_ids = np.sort(np.unique(verb_ids))

    n_nouns = len(noun_ids)
    n_verbs = len(verb_ids)
    n_actns = len(actn_ids)

    # these dictionaries get the id of many_shot (noun or verb) given the original (noun or verb)
    many_shot_noun_ids_dict = dict(zip(noun_ids, np.arange(n_nouns)))
    many_shot_verb_ids_dict = dict(zip(verb_ids, np.arange(n_verbs)))
    many_shot_actn_ids_dict = dict(zip(actn_ids, np.arange(n_actns)))

    utils.pkl_dump(many_shot_noun_ids_dict, noun_ids_many_shots_dict_path)
    utils.pkl_dump(many_shot_verb_ids_dict, verb_ids_many_shots_dict_path)
    utils.pkl_dump(many_shot_actn_ids_dict, actn_ids_many_shots_dict_path)
Exemple #14
0
    def __init__(self,
                 batch_size,
                 n_classes,
                 feature_dim,
                 feature_name,
                 is_training,
                 is_shuffle=True):
        """
        Initialization
        """

        self.batch_size = batch_size
        self.is_training = is_training
        self.n_classes = n_classes
        self.feature_dim = feature_dim
        self.feature_name = feature_name
        # self.is_shuffle = is_shuffle
        self.dataset_name = 'charades'

        # load annotation
        root_path = './data/charades'
        annotation_path = '%s/annotation/video_annotation_py3.pkl' % (
            root_path)
        if self.is_training:
            (video_names, y, _, _) = utils.pkl_load(annotation_path)
            print(video_names)
        else:
            (_, _, video_names, y) = utils.pkl_load(annotation_path)

        # in case of single label classification, debinarize the labels
        if config.cfg.MODEL.CLASSIFICATION_TYPE == 'sl':
            y = utils.debinarize_label(y)

        # in any case, make sure target is float
        y = y.astype(np.float32)

        # convert relative to root pathes
        feats_path = np.array([
            '%s/%s/%s.pkl' % (root_path, feature_name, p) for p in video_names
        ])

        n_samples = len(y)
        self.n_samples = n_samples
        self.n_batches = utils.calc_num_batches(n_samples, batch_size)
        self.feats_path = feats_path
        self.y = y
Exemple #15
0
def _105_prepare_action_gt_timestamped():
    """
    Get ground truth of unit-actions with their timestamps.
    :return:
    """
    root_path = c.DATA_ROOT_PATH
    video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl')
    unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl')
    gt_actions_path = Pth(
        'Breakfast/annotation/gt_unit_actions_timestamped.pkl')

    (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path)
    unit_actions = utils.pkl_load(unit_actions_path)

    video_pathes_tr = [
        '%s/Breakfast/videos/%s' % (
            root_path,
            __video_video_id_to_video_relative_path(id, False),
        ) for id in video_ids_tr
    ]
    video_pathes_te = [
        '%s/Breakfast/videos/%s' % (
            root_path,
            __video_video_id_to_video_relative_path(id, False),
        ) for id in video_ids_te
    ]

    gt_actions_te = __get_gt_actions_timestamped(video_pathes_te, unit_actions)
    gt_actions_tr = __get_gt_actions_timestamped(video_pathes_tr, unit_actions)

    gt_actions_tr = np.array(gt_actions_tr)
    gt_actions_te = np.array(gt_actions_te)

    l_tr = [len(i) for i in gt_actions_tr]
    l_te = [len(i) for i in gt_actions_te]
    print('mean, std, min, max for number of nodes in each video [tr/te]')
    print np.mean(l_tr), np.std(l_tr), np.min(l_tr), np.max(l_tr)
    print np.mean(l_te), np.std(l_te), np.min(l_te), np.max(l_te)

    print gt_actions_tr.shape
    print gt_actions_te.shape

    utils.pkl_dump(
        ((video_ids_tr, gt_actions_tr), (video_ids_te, gt_actions_te)),
        gt_actions_path)
    def __init__(self, features_root_path, n_timesteps, n_timesteps_total, is_random_tr, is_random_te):

        gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
        (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path)

        self.__feature_root_path = features_root_path
        self.__n_timesteps_total = n_timesteps_total
        self.__n_timesteps = n_timesteps
        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
    def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False):
        """
        :param n_timesteps:  How many timesteps per video.
        :param is_random_tr: Sample random or uniform frames.
        :param is_random_te: Sample random or uniform frames.
        :param is_shuffle_tr: To shuffle data or not.
        :param is_shuffle_te: To shuffle data or not.
        """

        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
        self.__is_shuffle_tr = is_shuffle_tr
        self.__is_shuffle_te = is_shuffle_te
        self.__n_timesteps = n_timesteps

        gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
        frames_dict_path = Pth('Breakfast/annotation/frames_dict.pkl')

        (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path)
        (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path)
def _202_spit_video_frames_relative_pathes():
    video_names_splits_path = Pth('EPIC-Kitchens/annotation/video_names_splits.pkl')
    frame_relative_pathes_dict_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict.pkl')
    frame_relative_pathes_dict_tr_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_tr.pkl')
    frame_relative_pathes_dict_te_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_te.pkl')

    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    frames_dict = utils.pkl_load(frame_relative_pathes_dict_path)

    dict_tr = dict()
    dict_te = dict()

    for v_name in video_names_tr:
        dict_tr[v_name] = frames_dict[v_name]

    for v_name in video_names_te:
        dict_te[v_name] = frames_dict[v_name]

    utils.pkl_dump(dict_tr, frame_relative_pathes_dict_tr_path)
    utils.pkl_dump(dict_te, frame_relative_pathes_dict_te_path)
    def __read_features(self, params):

        idx = params[0]
        path = params[1]

        # load feature
        print(path)
        feature = utils.pkl_load(path)

        # add current feature to the list
        self.__features[idx] = feature
Exemple #20
0
    def __init__(self, batch_size, n_classes, feature_dim, feature_name, is_training, is_shuffle=True):
        """
        Initialization
        """

        self.batch_size = batch_size #32
        self.is_training = is_training #True
        self.n_classes = n_classes #157
        self.feature_dim = feature_dim #(1024, 32, 7, 7)
        self.feature_name = feature_name #'features_i3d_pytorch_charades_rgb_mixed_5c_32f'
        self.is_shuffle = is_shuffle
        self.dataset_name = 'Charades'

        # load annotation
        root_path = './data/Charades'
        annotation_path = '%s/annotation/video_annotation.pkl' % (root_path) #视频注释./data/Charades/annotation/video_annotation.pkl

        if self.is_training:
            (video_names, y, _, _) = utils.pkl_load(annotation_path) #video_names [b'001YG' b'004QE' b'00HFP' ... b'ZZDBH' b'ZZN85' b'ZZXQF'],y.shape=(7811, 157)
        else:
            (_, _, video_names, y) = utils.pkl_load(annotation_path) #y.shape = (1814,157)

        # in case of single label classification, debinarize the labels,单标签
        if config.cfg.MODEL.CLASSIFICATION_TYPE == 'sl':
            y = utils.debinarize_label(y)

        # in any case, make sure target is float
        y = y.astype(np.float32)

        # convert relative to root pathes,通过I3D进行特征提取的特征存放的路径
        feats_path = np.array(['%s/%s/%s.pkl' % (root_path, feature_name, p.astype(str)) for p in video_names]) #原版的。#'./data/Charades/features_i3d_pytorch_charades_rgb_mixed_5c_32f/'

        n_samples = len(y)
        self.n_samples = n_samples
        self.n_batches = utils.calc_num_batches(n_samples, batch_size) #计算batch的个数
        self.feats_path = feats_path #特征存放的路径
        self.y = y

        # shuffle the data,打乱顺序
        if self.is_shuffle:
            self.__shuffle()
Exemple #21
0
def _401_pickle_features_i3d_mixed_5c():
    n_frames_per_video = 512
    features_root_path = Pth('Breakfast/features_i3d_mixed_5c_%d_frames',
                             (n_frames_per_video, ))
    features_path = Pth(
        'Breakfast/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl')

    (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path)

    n_tr = len(video_ids_tr)
    n_te = len(video_ids_te)

    n_frames_per_segment = 8
    n_segments = int(n_frames_per_video / n_frames_per_segment)
    assert n_segments * n_frames_per_segment == n_frames_per_video

    f_tr = np.zeros((n_tr, n_segments, 7, 7, 1024), dtype=np.float16)
    f_te = np.zeros((n_te, n_segments, 7, 7, 1024), dtype=np.float16)

    for i in range(n_tr):
        utils.print_counter(i, n_tr, 100)
        p = '%s/%s.pkl' % (features_root_path, video_ids_tr[i])
        f = utils.pkl_load(p)  # (T, 7, 7, 2048)
        f_tr[i] = f

    for i in range(n_te):
        utils.print_counter(i, n_te, 100)
        p = '%s/%s.pkl' % (features_root_path, video_ids_te[i])
        f = utils.pkl_load(p)  # (T, 7, 7, 2048)
        f_te[i] = f

    print f_tr.shape
    print f_te.shape

    print(utils.get_size_in_gb(utils.get_array_memory_size(f_tr)))
    print(utils.get_size_in_gb(utils.get_array_memory_size(f_te)))

    data_names = ['x_tr', 'x_te']
    utils.h5_dump_multi((f_tr, f_te), data_names, features_path)
    def __init__(self, features_path, n_timesteps, n_timesteps_total, is_random_tr=True, is_random_te=False, dataset_type=None):

        if dataset_type == const.DATASET_TYPES.breakfast:
            gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
            (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path)
        elif dataset_type == const.DATASET_TYPES.charades:
            gt_activities_path = Pth('Charades/annotation/video_annotation.pkl')
            (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path)
            self.__y_tr = self.__y_tr.astype(np.float32)
            self.__y_te = self.__y_te.astype(np.float32)
        else:
            raise Exception('Unknown Dataset Type: %s' % (dataset_type))

        (self.__x_tr, self.__x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])

        self.__feature_root_path = features_path
        self.__n_timesteps_total = n_timesteps_total
        self.__n_timesteps = n_timesteps

        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
    def __init__(self, features_path, n_timesteps, n_timesteps_total, dataset_type=None):

        if dataset_type == const.DATASET_TYPES.breakfast:
            gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
            (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path)
        elif dataset_type == const.DATASET_TYPES.charades:
            gt_activities_path = Pth('Charades/annotation/video_annotation.pkl')
            (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path)
            self.__y_tr = self.__y_tr.astype(np.float32)
            self.__y_te = self.__y_te.astype(np.float32)
        else:
            raise Exception('Unknown Dataset Type: %s' % (dataset_type))

        (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
        step = n_timesteps_total / float(n_timesteps)
        idxes = np.arange(0, n_timesteps_total, step, dtype=np.float32).astype(np.int32)
        x_tr = x_tr[:, :, idxes]
        x_te = x_te[:, :, idxes]

        self.__x_tr = x_tr.astype(np.float32)
        self.__x_te = x_te.astype(np.float32)
Exemple #24
0
def _06_prepare_video_annotation_multi_label():
    root_path = '.'
    video_annotation_path = '%s/Charades/annotation/video_annotation.pkl' % (
        root_path)
    video_annotation_multi_label_path = '%s/Charades/annotation/video_annotation_multi_label.pkl' % (
        root_path)

    (video_id_tr, y_tr, video_id_te,
     y_te) = utils.pkl_load(video_annotation_path)

    video_ids_tr = np.unique(video_id_tr)
    video_ids_te = np.unique(video_id_te)

    n_tr = len(video_ids_tr)
    n_te = len(video_ids_te)
    n_classes = N_CLASSES

    video_gt_dict_tr = dict()
    video_gt_dict_te = dict()

    for id in video_ids_tr:
        video_gt_dict_tr[id] = []

    for id in video_ids_te:
        video_gt_dict_te[id] = []
    """
    zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。

    如果各个迭代器的元素个数不一致,则返回列表长度与最短的对象相同,利用 * 号操作符,可以将元组解压为列表。
    """
    for i, j in zip(video_id_tr, y_tr):
        video_gt_dict_tr[i].append(j)

    for i, j in zip(video_id_te, y_te):
        video_gt_dict_te[i].append(j)

    # binarize labels of videos
    y_multi_label_tr = np.zeros((n_tr, n_classes), dtype=np.int)
    y_multi_label_te = np.zeros((n_te, n_classes), dtype=np.int)

    for idx_video, video_name in enumerate(video_ids_tr):
        idx_class = np.add(video_gt_dict_tr[video_name], -1)
        y_multi_label_tr[idx_video][idx_class] = 1
        _ = 10

    for idx_video, video_name in enumerate(video_ids_te):
        idx_class = np.add(video_gt_dict_te[video_name], -1)
        y_multi_label_te[idx_video][idx_class] = 1
        _ = 10

    data = (video_ids_tr, y_multi_label_tr, video_ids_te, y_multi_label_te)
    utils.pkl_dump(data, video_annotation_multi_label_path)
Exemple #25
0
    def __getitem__(self, index):
        """
        Generate one batch of data
        """

        y = self.y[index]
        p = self.feats_path[index]
        x = utils.pkl_load(p)  # (T, H, W, C)

        # convert to channel last
        x = np.transpose(x, (3, 0, 1, 2))  # (T, H, W, C)

        return x, y
def __get_action_names_from_action_keys(action_keys):
    root_path = c.DATA_ROOT_PATH
    verb_dict_path = '%s/EPIC-Kitchens/annotation/EPIC_verb_classes_dict.pkl' % (root_path)
    noun_dict_path = '%s/EPIC-Kitchens/annotation/EPIC_noun_classes_dict.pkl' % (root_path)

    verb_dict = utils.pkl_load(verb_dict_path)
    noun_dict = utils.pkl_load(noun_dict_path)

    action_names = []
    for key in action_keys:
        verb_idx, noun_idx = key.split('_')
        verb_idx = int(verb_idx)
        noun_idx = int(noun_idx)
        verb_name = verb_dict[verb_idx][0]
        noun_name = noun_dict[noun_idx][0]
        action_name = '%s_%s' % (verb_name, noun_name)
        action_names.append(action_name)

    action_names = np.array(action_names)
    return action_names

# endregion
    def __init__(self, img_root_path, is_shuffle_tr=True, is_shuffle_te=False):
        annot_path = Pth('Hico/annotation/anno_hico.pkl')

        (self.img_names_tr, self.y_tr, self.img_names_te, self.y_te) = utils.pkl_load(annot_path)

        self.y_tr = self.y_tr.astype(np.float32)
        self.y_te = self.y_te.astype(np.float32)

        self.is_shuffle_tr = is_shuffle_tr
        self.is_shuffle_te = is_shuffle_te

        self.img_names_tr = np.array(['%s/%s' % (img_root_path, n) for n in self.img_names_tr])
        self.img_names_te = np.array(['%s/%s' % (img_root_path, n) for n in self.img_names_te])
Exemple #28
0
def _03_mean_std_of_nodes():
    sns.set_style('whitegrid')
    sns.set(style='darkgrid')
    sns.set(style='darkgrid')  # white, dark, whitegrid, darkgrid, ticks

    n_epochs = 100
    node_dim = 1024
    n_centroids = 128

    model_name = 'classifier_19.02.21-01:00:30'
    nodes_root_path = Pth('Breakfast/qualitative_results/node_embedding_%s' % (model_name,))

    # load nodes from files
    nodes = []
    nodes_file_pathes = utils.file_pathes(nodes_root_path, is_nat_sort=True)
    for i in range(n_epochs):
        n = utils.pkl_load(nodes_file_pathes[i])
        nodes.append(n)
    nodes = np.array(nodes)  # (50, 128, 1024)

    distances = []
    for i in range(n_epochs):
        n = nodes[i]
        n = utils.normalize_l1(n)
        d = distance.cdist(n, n, metric='euclidean')
        d = np.mean(d)
        distances.append(d)

    distances = np.array(distances)

    fig, ax = plt.subplots(nrows=1, ncols=1, num=1, figsize=(4, 2))
    colors = plot_utils.tableau_category10()
    ax.set_title('')

    y = distances
    x = np.arange(1, n_epochs + 1)

    fit_fn = np.poly1d(np.polyfit(x, y, 4))
    y_fit = fit_fn(x)
    sigma = 0.005

    plt.fill_between(x, y_fit + sigma, y_fit - sigma, facecolor=colors[0], alpha=0.25)
    ax.plot(x, y, '.', c=colors[0], markersize=9, alpha=1.0)
    ax.plot(x, y_fit, color='black', lw=1)

    plt.tight_layout()
    plt.subplots_adjust(left=0.2, right=0.9, top=0.9, bottom=0.3)
    plt.grid('off')
    plt.xlabel('Epoch Number')
    plt.ylabel('Distance')
    plt.show()
Exemple #29
0
def _02_plot_nodes_over_epochs():
    sns.set_style('whitegrid')
    sns.set(style='darkgrid')

    n_epochs = 50
    node_dim = 1024
    n_centroids = 128

    # for plotting
    is_async_tsne = True
    window_size = 15
    n_max_centroids = 40

    model_name = 'classifier_19.02.21-01:00:30'
    nodes_root_path = Pth('Breakfast/qualitative_results/node_embedding_%s' % (model_name,))

    # load nodes from files
    nodes = []
    nodes_file_pathes = utils.file_pathes(nodes_root_path, is_nat_sort=True)
    for i in range(n_epochs):
        n = utils.pkl_load(nodes_file_pathes[i])
        nodes.append(n)
    nodes = np.array(nodes)  # (50, 128, 1024)
    nodes = nodes[:, 0:n_max_centroids]  # (50, 100, 1024)
    n_centroids = n_max_centroids
    print nodes.shape

    # embed the nodes
    nodes_1 = nodes[:window_size]
    nodes_2 = nodes[-window_size:]
    print nodes_1.shape
    print nodes_2.shape
    nodes_1 = np.reshape(nodes_1, (-1, node_dim))
    nodes_2 = np.reshape(nodes_2, (-1, node_dim))
    print nodes_1.shape
    print nodes_2.shape
    nodes_1 = __async_tsne_embedding(nodes_1) if is_async_tsne else utils.learn_manifold(c.MANIFOLD_TYPES[0], nodes_1)
    nodes_2 = __async_tsne_embedding(nodes_2) if is_async_tsne else utils.learn_manifold(c.MANIFOLD_TYPES[0], nodes_2)
    nodes_1 = np.reshape(nodes_1, (window_size, n_centroids, 2))  # (50, 100, 1024)
    nodes_2 = np.reshape(nodes_2, (window_size, n_centroids, 2))  # (50, 100, 1024)
    print nodes_1.shape
    print nodes_2.shape

    # colors = plot_utils.tableau_category20()
    colors = plot_utils.colors_256()
    colors_1 = colors[:n_centroids]
    colors_2 = colors[n_centroids + 1: n_centroids + n_centroids + 1]

    __plot_centroids(nodes_1, window_size, n_centroids, colors_1, 1)
    __plot_centroids(nodes_2, window_size, n_centroids, colors_2, 2)
    def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False):
        """
        :param n_timesteps:  How many timesteps per video.
        :param is_random_tr: Sample random or uniform frames.
        :param is_random_te: Sample random or uniform frames.
        :param is_shuffle_tr: To shuffle data or not.
        :param is_shuffle_te: To shuffle data or not.
        """

        self.__is_random_tr = is_random_tr
        self.__is_random_te = is_random_te
        self.__is_shuffle_tr = is_shuffle_tr
        self.__is_shuffle_te = is_shuffle_te
        self.__n_timesteps = n_timesteps

        frames_dict_path = Pth('Charades/annotation/frames_dict_all_frames.pkl')
        annotation_path = Pth('Charades/annotation/video_annotation.pkl')

        (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path)
        (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(annotation_path)

        self.__y_tr = self.__y_tr.astype(np.float32)
        self.__y_te = self.__y_te.astype(np.float32)