def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False, is_binarize_output=False): """ :param n_timesteps: How many timesteps per video. :param is_random_tr: Sample random or uniform frames. :param is_random_te: Sample random or uniform frames. :param is_shuffle_tr: To shuffle data or not. :param is_shuffle_te: To shuffle data or not. """ self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te self.__is_shuffle_tr = is_shuffle_tr self.__is_shuffle_te = is_shuffle_te self.__n_timesteps = n_timesteps self.__n_frames_per_segment = 8 self.__n_frames = self.__n_timesteps * self.__n_frames_per_segment gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') frames_dict_path = Pth('Breakfast/annotation/frames_dict.pkl') (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path) (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path) if is_binarize_output: classes = np.arange(0, 10) self.__y_tr = utils.binarize_label(self.__y_tr, classes) self.__y_te = utils.binarize_label(self.__y_te, classes)
def _05_visualize_attention_values(): # load data n_timesteps = 64 n_centroids = 128 model_name = 'classifier_19.02.21-01:00:30' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') frames_annot_path = Pth('Breakfast/annotation/annot_frames_i3d_%d.pkl', (512,)) attention_values_path = Pth('Breakfast/qualitative_results/node_attention_%s.pkl', (model_name,)) n_classes = ds_breakfast.N_CLASSES_ACTIVITIES frames_annot = utils.pkl_load(frames_annot_path) (video_ids_tr, y_tr), (video_ids_te, y_te) = utils.pkl_load(gt_activities_path) y_tr = utils.debinarize_label(y_tr) y_te = utils.debinarize_label(y_te) (att_tr, att_te) = utils.pkl_load(attention_values_path) # (1357, 64, 128), (355, 64, 128) attentions_tr = np.array([np.average(att_tr[np.where(y_tr == idx_class)[0]], axis=(0, 1)) for idx_class in range(n_classes)]) # (10, 128) attentions_te = np.array([np.average(att_te[np.where(y_te == idx_class)[0]], axis=(0, 1)) for idx_class in range(n_classes)]) # (10, 128) # remove least attended centroids all_attn_vals = np.mean(attentions_tr, axis=1) _ = 10
def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False): """ :param n_timesteps: How many timesteps per video. :param is_random_tr: Sample random or uniform frames. :param is_random_te: Sample random or uniform frames. :param is_shuffle_tr: To shuffle data or not. :param is_shuffle_te: To shuffle data or not. """ frames_dict_path = Pth('Charades/annotation/frames_dict_all_frames.pkl') annotation_path = Pth('Charades/annotation/video_annotation.pkl') self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te self.__is_shuffle_tr = is_shuffle_tr self.__is_shuffle_te = is_shuffle_te self.__n_timesteps = n_timesteps self.__n_frames_per_segment = 8 self.__n_frames = self.__n_timesteps * self.__n_frames_per_segment (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path) (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(annotation_path) self.current_train = None self.current_test = None
def _804_random_sample_frames_for_i3d_test_video_level_by_split( split_type='train'): assert split_type in ['train', 'test' ], 'Sorry, unknown split type: %s' % (split_type) is_train = split_type == 'train' file_name_suffix = 'tr' if is_train else 'te' root_path_id = 0 if configs.is_local_machine() else 5 frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train', root_type=c.ROOT_PATH_TYPES[root_path_id]) frame_relative_pathes_dict_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_%s.pkl', (file_name_suffix, )) video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) video_names = video_names_tr if is_train else video_names_te del video_names_tr del video_names_te frame_relative_pathes_dict = utils.pkl_load( frame_relative_pathes_dict_path) # loop on the videos and sample frames for i3d n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment sampled_frames = __random_sample_frames_per_video_for_i3d( video_names, frames_root_path, frame_relative_pathes_dict, n_frames_per_segment, n_frames_per_video) return sampled_frames
def _802_uniform_sample_frames_for_i3d_test_video_level(): video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') frame_relative_pathes_dict_tr_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_tr.pkl') frame_relative_pathes_dict_te_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_te.pkl') sampled_frames_relative_pathes = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_uniform_sample.pkl') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frame_relative_pathes_dict_tr = utils.pkl_load( frame_relative_pathes_dict_tr_path) frame_relative_pathes_dict_te = utils.pkl_load( frame_relative_pathes_dict_te_path) n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment sampled_frames_tr = __uniform_sample_frames_per_video_for_i3d( video_names_tr, frame_relative_pathes_dict_tr, n_frames_per_segment, n_frames_per_video) sampled_frames_te = __uniform_sample_frames_per_video_for_i3d( video_names_te, frame_relative_pathes_dict_te, n_frames_per_segment, n_frames_per_video) data = (sampled_frames_tr, sampled_frames_te) utils.pkl_dump(data, sampled_frames_relative_pathes)
def _803_random_sample_frames_for_i3d_test_video_level(): video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') frame_relative_pathes_dict_tr_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_tr.pkl') frame_relative_pathes_dict_te_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_te.pkl') root_path_id = 0 if configs.is_local_machine() else 5 frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train', root_type=c.ROOT_PATH_TYPES[root_path_id]) (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frame_relative_pathes_dict_tr = utils.pkl_load( frame_relative_pathes_dict_tr_path) frame_relative_pathes_dict_te = utils.pkl_load( frame_relative_pathes_dict_te_path) # loop on the videos and sample frames for i3d n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment sampled_frames_tr = __random_sample_frames_per_video_for_i3d( video_names_tr, frames_root_path, frame_relative_pathes_dict_tr, n_frames_per_segment, n_frames_per_video) sampled_frames_te = __random_sample_frames_per_video_for_i3d( video_names_te, frames_root_path, frame_relative_pathes_dict_te, n_frames_per_segment, n_frames_per_video) return (sampled_frames_tr, sampled_frames_te)
def __init__(self, n_timesteps, n_timesteps_total, featurenet_type, x_heavy_path, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False): """ :param n_timesteps: How many timesteps per video. :param is_random_tr: Sample random or uniform frames. :param is_random_te: Sample random or uniform frames. :param is_shuffle_tr: To shuffle data or not. :param is_shuffle_te: To shuffle data or not. """ self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te self.__is_shuffle_tr = is_shuffle_tr self.__is_shuffle_te = is_shuffle_te self.__n_timesteps = n_timesteps self.__n_timesteps_total = n_timesteps_total n_frames_per_segment = utils.get_model_n_frames_per_segment(featurenet_type) n_frames = n_timesteps_total * n_frames_per_segment gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') frames_annot_path = Pth('Breakfast/annotation/annot_frames_%s_%d.pkl', (featurenet_type, n_frames,)) (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path) (x_heavy_tr, x_heavy_te) = utils.h5_load_multi(x_heavy_path, ['x_tr', 'x_te']) # (B, C, T, H, W) self.__x_heavy_tr = x_heavy_tr self.__x_heavy_te = x_heavy_te # select middle frame from each snippet (frames_dict_tr, frames_dict_te) = utils.pkl_load(frames_annot_path) frames_dict_tr = self.__select_middle_frame(frames_dict_tr, n_frames_per_segment) frames_dict_te = self.__select_middle_frame(frames_dict_te, n_frames_per_segment) self.__frames_dict_tr = frames_dict_tr self.__frames_dict_te = frames_dict_te
def _703_prepare_data_splits(): """ Sample fram pathes for the i3d model. :return: """ annot_dict_path = Pth( 'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl') annot_idxes_many_shots_path = Pth( 'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl') video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path) annot_dict = utils.pkl_load(annot_dict_path) # split_ratio split_ratio = 0.8 person_videos_dict = {} # first loop to collect all unique video ids for annot_id in annot_idxes_many_shots: annot_line = annot_dict[annot_id] person_id = annot_line[0] video_id = annot_line[1] if person_id not in person_videos_dict: person_videos_dict[person_id] = [] person_videos_dict[person_id].append(video_id) for person_id in person_videos_dict: video_names = natsort.natsorted( np.unique(person_videos_dict[person_id])) person_videos_dict[person_id] = video_names # now that we have collected the persons, and their videos, see how much videos if we split video_names_tr = [] video_names_te = [] for person_id in person_videos_dict: v_names = person_videos_dict[person_id] idx = int(len(v_names) * split_ratio) v_names_tr = v_names[:idx] v_names_te = v_names[idx:] video_names_tr += v_names_tr video_names_te += v_names_te video_names_tr = np.array(video_names_tr) video_names_te = np.array(video_names_te) print len(video_names_tr) + len(video_names_te) print len(video_names_tr) print len(video_names_te) # save video names utils.pkl_dump((video_names_tr, video_names_te), video_names_splits_path)
def __read_features(self, params): features_path, idx_feature, idx_timestep = params frame_feature = utils.pkl_load(features_path) # add current feature to the list self.__features[idx_feature, idx_timestep] = frame_feature
def __load_features(self, params): idx_video = params[0] feats_path = params[1] try: # load feature from file feats = utils.pkl_load(feats_path) n_feats = len(feats) if self.__feat_map_side_dim == 1: feats = np.expand_dims(feats, 1) feats = np.expand_dims(feats, 1) # some videos have frames less than required, then take all features # and zero-pad (may be repeat the frames?) till the required is satisfied if n_feats < self.__n_frames_per_video: feats = self.__pad_video_feats(feats) else: # randomly sample only n frames idx_feats = np.arange(n_feats) np.random.shuffle(idx_feats) idx_feats = idx_feats[:self.__n_frames_per_video] feats = feats[idx_feats] assert len(feats) == self.__n_frames_per_video # as float feats = feats.astype(np.float32) self.__batch_features[idx_video] = feats _ = 10 except Exception as exp: print('\nSorry, error in loading feature %s' % (feats_path)) print(exp)
def _800_prepare_video_frames_path_dict(): frame_relative_pathes_dict_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict.pkl') video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') imgs_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) video_names = np.hstack((video_names_tr, video_names_te)) frame_relative_pathes_dict = {} n_videos = len(video_names) for idx, video_id in enumerate(video_names): utils.print_counter(idx, n_videos) person_id = video_id.split('_')[0] video_frames_root_path = '%s/%s/%s' % (imgs_root_path, person_id, video_id) video_frames_names = utils.file_names(video_frames_root_path, is_nat_sort=True) video_frames_names = np.array(video_frames_names) video_frames_relative_pathes = np.array([ '%s/%s/%s' % (person_id, video_id, n) for n in video_frames_names ]) frame_relative_pathes_dict[video_id] = video_frames_relative_pathes utils.pkl_dump(frame_relative_pathes_dict, frame_relative_pathes_dict_path)
def __count_how_many_videos_per_class(): root_path = c.data_root_path annotation_path = '%s/Charades/annotation/video_annotation.pkl' % ( root_path) (video_id_tr, y_tr, video_id_te, y_te) = utils.pkl_load(annotation_path) n_classes = N_CLASSES counts_tr = [] counts_te = [] for i in range(n_classes): counts_tr.append(len(np.where(y_tr == i + 1)[0])) counts_te.append(len(np.where(y_te == i + 1)[0])) counts_tr = np.array(counts_tr) counts_te = np.array(counts_te) idx = np.argsort(counts_tr)[::-1] counts_tr = counts_tr[idx] counts_te = counts_te[idx] counts = np.array([counts_tr, counts_te]) print(counts_tr) print(counts_te) utils.plot_multi(counts, title='Counts')
def _704_prepare_many_shots_noun_verb_action_ids(): """ Prepeare two dicts of nouns and verbs to convert from id to many_shot id. All ids are zero-indexed. 71 noun classes 26 verb classes xx actions :return: """ annot_dict_path = Pth( 'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl') annot_idxes_many_shots_path = Pth( 'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl') noun_ids_many_shots_dict_path = Pth( 'EPIC-Kitchens/annotations/noun_ids_many_shots_dict.pkl') verb_ids_many_shots_dict_path = Pth( 'EPIC-Kitchens/annotations/verb_ids_many_shots_dict.pkl') actn_ids_many_shots_dict_path = Pth( 'EPIC-Kitchens/annotations/actn_ids_many_shots_dict.pkl') actn_ids_many_shots_list_path = Pth( 'EPIC-Kitchens/annotations//EPIC_many_shot_actions.csv') annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path) annot_dict = utils.pkl_load(annot_dict_path) # get all verb_ids, noun_ids noun_ids = [ annot_dict[annot_id][10] for annot_id in annot_idxes_many_shots ] verb_ids = [annot_dict[annot_id][8] for annot_id in annot_idxes_many_shots] actn_ids = __get_action_ids_from_annotation(actn_ids_many_shots_list_path) noun_ids = np.sort(np.unique(noun_ids)) verb_ids = np.sort(np.unique(verb_ids)) n_nouns = len(noun_ids) n_verbs = len(verb_ids) n_actns = len(actn_ids) # these dictionaries get the id of many_shot (noun or verb) given the original (noun or verb) many_shot_noun_ids_dict = dict(zip(noun_ids, np.arange(n_nouns))) many_shot_verb_ids_dict = dict(zip(verb_ids, np.arange(n_verbs))) many_shot_actn_ids_dict = dict(zip(actn_ids, np.arange(n_actns))) utils.pkl_dump(many_shot_noun_ids_dict, noun_ids_many_shots_dict_path) utils.pkl_dump(many_shot_verb_ids_dict, verb_ids_many_shots_dict_path) utils.pkl_dump(many_shot_actn_ids_dict, actn_ids_many_shots_dict_path)
def __init__(self, batch_size, n_classes, feature_dim, feature_name, is_training, is_shuffle=True): """ Initialization """ self.batch_size = batch_size self.is_training = is_training self.n_classes = n_classes self.feature_dim = feature_dim self.feature_name = feature_name # self.is_shuffle = is_shuffle self.dataset_name = 'charades' # load annotation root_path = './data/charades' annotation_path = '%s/annotation/video_annotation_py3.pkl' % ( root_path) if self.is_training: (video_names, y, _, _) = utils.pkl_load(annotation_path) print(video_names) else: (_, _, video_names, y) = utils.pkl_load(annotation_path) # in case of single label classification, debinarize the labels if config.cfg.MODEL.CLASSIFICATION_TYPE == 'sl': y = utils.debinarize_label(y) # in any case, make sure target is float y = y.astype(np.float32) # convert relative to root pathes feats_path = np.array([ '%s/%s/%s.pkl' % (root_path, feature_name, p) for p in video_names ]) n_samples = len(y) self.n_samples = n_samples self.n_batches = utils.calc_num_batches(n_samples, batch_size) self.feats_path = feats_path self.y = y
def _105_prepare_action_gt_timestamped(): """ Get ground truth of unit-actions with their timestamps. :return: """ root_path = c.DATA_ROOT_PATH video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl') unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl') gt_actions_path = Pth( 'Breakfast/annotation/gt_unit_actions_timestamped.pkl') (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path) unit_actions = utils.pkl_load(unit_actions_path) video_pathes_tr = [ '%s/Breakfast/videos/%s' % ( root_path, __video_video_id_to_video_relative_path(id, False), ) for id in video_ids_tr ] video_pathes_te = [ '%s/Breakfast/videos/%s' % ( root_path, __video_video_id_to_video_relative_path(id, False), ) for id in video_ids_te ] gt_actions_te = __get_gt_actions_timestamped(video_pathes_te, unit_actions) gt_actions_tr = __get_gt_actions_timestamped(video_pathes_tr, unit_actions) gt_actions_tr = np.array(gt_actions_tr) gt_actions_te = np.array(gt_actions_te) l_tr = [len(i) for i in gt_actions_tr] l_te = [len(i) for i in gt_actions_te] print('mean, std, min, max for number of nodes in each video [tr/te]') print np.mean(l_tr), np.std(l_tr), np.min(l_tr), np.max(l_tr) print np.mean(l_te), np.std(l_te), np.min(l_te), np.max(l_te) print gt_actions_tr.shape print gt_actions_te.shape utils.pkl_dump( ((video_ids_tr, gt_actions_tr), (video_ids_te, gt_actions_te)), gt_actions_path)
def __init__(self, features_root_path, n_timesteps, n_timesteps_total, is_random_tr, is_random_te): gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path) self.__feature_root_path = features_root_path self.__n_timesteps_total = n_timesteps_total self.__n_timesteps = n_timesteps self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te
def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False): """ :param n_timesteps: How many timesteps per video. :param is_random_tr: Sample random or uniform frames. :param is_random_te: Sample random or uniform frames. :param is_shuffle_tr: To shuffle data or not. :param is_shuffle_te: To shuffle data or not. """ self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te self.__is_shuffle_tr = is_shuffle_tr self.__is_shuffle_te = is_shuffle_te self.__n_timesteps = n_timesteps gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') frames_dict_path = Pth('Breakfast/annotation/frames_dict.pkl') (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path) (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path)
def _202_spit_video_frames_relative_pathes(): video_names_splits_path = Pth('EPIC-Kitchens/annotation/video_names_splits.pkl') frame_relative_pathes_dict_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict.pkl') frame_relative_pathes_dict_tr_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_tr.pkl') frame_relative_pathes_dict_te_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_te.pkl') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frames_dict = utils.pkl_load(frame_relative_pathes_dict_path) dict_tr = dict() dict_te = dict() for v_name in video_names_tr: dict_tr[v_name] = frames_dict[v_name] for v_name in video_names_te: dict_te[v_name] = frames_dict[v_name] utils.pkl_dump(dict_tr, frame_relative_pathes_dict_tr_path) utils.pkl_dump(dict_te, frame_relative_pathes_dict_te_path)
def __read_features(self, params): idx = params[0] path = params[1] # load feature print(path) feature = utils.pkl_load(path) # add current feature to the list self.__features[idx] = feature
def __init__(self, batch_size, n_classes, feature_dim, feature_name, is_training, is_shuffle=True): """ Initialization """ self.batch_size = batch_size #32 self.is_training = is_training #True self.n_classes = n_classes #157 self.feature_dim = feature_dim #(1024, 32, 7, 7) self.feature_name = feature_name #'features_i3d_pytorch_charades_rgb_mixed_5c_32f' self.is_shuffle = is_shuffle self.dataset_name = 'Charades' # load annotation root_path = './data/Charades' annotation_path = '%s/annotation/video_annotation.pkl' % (root_path) #视频注释./data/Charades/annotation/video_annotation.pkl if self.is_training: (video_names, y, _, _) = utils.pkl_load(annotation_path) #video_names [b'001YG' b'004QE' b'00HFP' ... b'ZZDBH' b'ZZN85' b'ZZXQF'],y.shape=(7811, 157) else: (_, _, video_names, y) = utils.pkl_load(annotation_path) #y.shape = (1814,157) # in case of single label classification, debinarize the labels,单标签 if config.cfg.MODEL.CLASSIFICATION_TYPE == 'sl': y = utils.debinarize_label(y) # in any case, make sure target is float y = y.astype(np.float32) # convert relative to root pathes,通过I3D进行特征提取的特征存放的路径 feats_path = np.array(['%s/%s/%s.pkl' % (root_path, feature_name, p.astype(str)) for p in video_names]) #原版的。#'./data/Charades/features_i3d_pytorch_charades_rgb_mixed_5c_32f/' n_samples = len(y) self.n_samples = n_samples self.n_batches = utils.calc_num_batches(n_samples, batch_size) #计算batch的个数 self.feats_path = feats_path #特征存放的路径 self.y = y # shuffle the data,打乱顺序 if self.is_shuffle: self.__shuffle()
def _401_pickle_features_i3d_mixed_5c(): n_frames_per_video = 512 features_root_path = Pth('Breakfast/features_i3d_mixed_5c_%d_frames', (n_frames_per_video, )) features_path = Pth( 'Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl') (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path) n_tr = len(video_ids_tr) n_te = len(video_ids_te) n_frames_per_segment = 8 n_segments = int(n_frames_per_video / n_frames_per_segment) assert n_segments * n_frames_per_segment == n_frames_per_video f_tr = np.zeros((n_tr, n_segments, 7, 7, 1024), dtype=np.float16) f_te = np.zeros((n_te, n_segments, 7, 7, 1024), dtype=np.float16) for i in range(n_tr): utils.print_counter(i, n_tr, 100) p = '%s/%s.pkl' % (features_root_path, video_ids_tr[i]) f = utils.pkl_load(p) # (T, 7, 7, 2048) f_tr[i] = f for i in range(n_te): utils.print_counter(i, n_te, 100) p = '%s/%s.pkl' % (features_root_path, video_ids_te[i]) f = utils.pkl_load(p) # (T, 7, 7, 2048) f_te[i] = f print f_tr.shape print f_te.shape print(utils.get_size_in_gb(utils.get_array_memory_size(f_tr))) print(utils.get_size_in_gb(utils.get_array_memory_size(f_te))) data_names = ['x_tr', 'x_te'] utils.h5_dump_multi((f_tr, f_te), data_names, features_path)
def __init__(self, features_path, n_timesteps, n_timesteps_total, is_random_tr=True, is_random_te=False, dataset_type=None): if dataset_type == const.DATASET_TYPES.breakfast: gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path) elif dataset_type == const.DATASET_TYPES.charades: gt_activities_path = Pth('Charades/annotation/video_annotation.pkl') (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path) self.__y_tr = self.__y_tr.astype(np.float32) self.__y_te = self.__y_te.astype(np.float32) else: raise Exception('Unknown Dataset Type: %s' % (dataset_type)) (self.__x_tr, self.__x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) self.__feature_root_path = features_path self.__n_timesteps_total = n_timesteps_total self.__n_timesteps = n_timesteps self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te
def __init__(self, features_path, n_timesteps, n_timesteps_total, dataset_type=None): if dataset_type == const.DATASET_TYPES.breakfast: gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path) elif dataset_type == const.DATASET_TYPES.charades: gt_activities_path = Pth('Charades/annotation/video_annotation.pkl') (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path) self.__y_tr = self.__y_tr.astype(np.float32) self.__y_te = self.__y_te.astype(np.float32) else: raise Exception('Unknown Dataset Type: %s' % (dataset_type)) (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) step = n_timesteps_total / float(n_timesteps) idxes = np.arange(0, n_timesteps_total, step, dtype=np.float32).astype(np.int32) x_tr = x_tr[:, :, idxes] x_te = x_te[:, :, idxes] self.__x_tr = x_tr.astype(np.float32) self.__x_te = x_te.astype(np.float32)
def _06_prepare_video_annotation_multi_label(): root_path = '.' video_annotation_path = '%s/Charades/annotation/video_annotation.pkl' % ( root_path) video_annotation_multi_label_path = '%s/Charades/annotation/video_annotation_multi_label.pkl' % ( root_path) (video_id_tr, y_tr, video_id_te, y_te) = utils.pkl_load(video_annotation_path) video_ids_tr = np.unique(video_id_tr) video_ids_te = np.unique(video_id_te) n_tr = len(video_ids_tr) n_te = len(video_ids_te) n_classes = N_CLASSES video_gt_dict_tr = dict() video_gt_dict_te = dict() for id in video_ids_tr: video_gt_dict_tr[id] = [] for id in video_ids_te: video_gt_dict_te[id] = [] """ zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。 如果各个迭代器的元素个数不一致,则返回列表长度与最短的对象相同,利用 * 号操作符,可以将元组解压为列表。 """ for i, j in zip(video_id_tr, y_tr): video_gt_dict_tr[i].append(j) for i, j in zip(video_id_te, y_te): video_gt_dict_te[i].append(j) # binarize labels of videos y_multi_label_tr = np.zeros((n_tr, n_classes), dtype=np.int) y_multi_label_te = np.zeros((n_te, n_classes), dtype=np.int) for idx_video, video_name in enumerate(video_ids_tr): idx_class = np.add(video_gt_dict_tr[video_name], -1) y_multi_label_tr[idx_video][idx_class] = 1 _ = 10 for idx_video, video_name in enumerate(video_ids_te): idx_class = np.add(video_gt_dict_te[video_name], -1) y_multi_label_te[idx_video][idx_class] = 1 _ = 10 data = (video_ids_tr, y_multi_label_tr, video_ids_te, y_multi_label_te) utils.pkl_dump(data, video_annotation_multi_label_path)
def __getitem__(self, index): """ Generate one batch of data """ y = self.y[index] p = self.feats_path[index] x = utils.pkl_load(p) # (T, H, W, C) # convert to channel last x = np.transpose(x, (3, 0, 1, 2)) # (T, H, W, C) return x, y
def __get_action_names_from_action_keys(action_keys): root_path = c.DATA_ROOT_PATH verb_dict_path = '%s/EPIC-Kitchens/annotation/EPIC_verb_classes_dict.pkl' % (root_path) noun_dict_path = '%s/EPIC-Kitchens/annotation/EPIC_noun_classes_dict.pkl' % (root_path) verb_dict = utils.pkl_load(verb_dict_path) noun_dict = utils.pkl_load(noun_dict_path) action_names = [] for key in action_keys: verb_idx, noun_idx = key.split('_') verb_idx = int(verb_idx) noun_idx = int(noun_idx) verb_name = verb_dict[verb_idx][0] noun_name = noun_dict[noun_idx][0] action_name = '%s_%s' % (verb_name, noun_name) action_names.append(action_name) action_names = np.array(action_names) return action_names # endregion
def __init__(self, img_root_path, is_shuffle_tr=True, is_shuffle_te=False): annot_path = Pth('Hico/annotation/anno_hico.pkl') (self.img_names_tr, self.y_tr, self.img_names_te, self.y_te) = utils.pkl_load(annot_path) self.y_tr = self.y_tr.astype(np.float32) self.y_te = self.y_te.astype(np.float32) self.is_shuffle_tr = is_shuffle_tr self.is_shuffle_te = is_shuffle_te self.img_names_tr = np.array(['%s/%s' % (img_root_path, n) for n in self.img_names_tr]) self.img_names_te = np.array(['%s/%s' % (img_root_path, n) for n in self.img_names_te])
def _03_mean_std_of_nodes(): sns.set_style('whitegrid') sns.set(style='darkgrid') sns.set(style='darkgrid') # white, dark, whitegrid, darkgrid, ticks n_epochs = 100 node_dim = 1024 n_centroids = 128 model_name = 'classifier_19.02.21-01:00:30' nodes_root_path = Pth('Breakfast/qualitative_results/node_embedding_%s' % (model_name,)) # load nodes from files nodes = [] nodes_file_pathes = utils.file_pathes(nodes_root_path, is_nat_sort=True) for i in range(n_epochs): n = utils.pkl_load(nodes_file_pathes[i]) nodes.append(n) nodes = np.array(nodes) # (50, 128, 1024) distances = [] for i in range(n_epochs): n = nodes[i] n = utils.normalize_l1(n) d = distance.cdist(n, n, metric='euclidean') d = np.mean(d) distances.append(d) distances = np.array(distances) fig, ax = plt.subplots(nrows=1, ncols=1, num=1, figsize=(4, 2)) colors = plot_utils.tableau_category10() ax.set_title('') y = distances x = np.arange(1, n_epochs + 1) fit_fn = np.poly1d(np.polyfit(x, y, 4)) y_fit = fit_fn(x) sigma = 0.005 plt.fill_between(x, y_fit + sigma, y_fit - sigma, facecolor=colors[0], alpha=0.25) ax.plot(x, y, '.', c=colors[0], markersize=9, alpha=1.0) ax.plot(x, y_fit, color='black', lw=1) plt.tight_layout() plt.subplots_adjust(left=0.2, right=0.9, top=0.9, bottom=0.3) plt.grid('off') plt.xlabel('Epoch Number') plt.ylabel('Distance') plt.show()
def _02_plot_nodes_over_epochs(): sns.set_style('whitegrid') sns.set(style='darkgrid') n_epochs = 50 node_dim = 1024 n_centroids = 128 # for plotting is_async_tsne = True window_size = 15 n_max_centroids = 40 model_name = 'classifier_19.02.21-01:00:30' nodes_root_path = Pth('Breakfast/qualitative_results/node_embedding_%s' % (model_name,)) # load nodes from files nodes = [] nodes_file_pathes = utils.file_pathes(nodes_root_path, is_nat_sort=True) for i in range(n_epochs): n = utils.pkl_load(nodes_file_pathes[i]) nodes.append(n) nodes = np.array(nodes) # (50, 128, 1024) nodes = nodes[:, 0:n_max_centroids] # (50, 100, 1024) n_centroids = n_max_centroids print nodes.shape # embed the nodes nodes_1 = nodes[:window_size] nodes_2 = nodes[-window_size:] print nodes_1.shape print nodes_2.shape nodes_1 = np.reshape(nodes_1, (-1, node_dim)) nodes_2 = np.reshape(nodes_2, (-1, node_dim)) print nodes_1.shape print nodes_2.shape nodes_1 = __async_tsne_embedding(nodes_1) if is_async_tsne else utils.learn_manifold(c.MANIFOLD_TYPES[0], nodes_1) nodes_2 = __async_tsne_embedding(nodes_2) if is_async_tsne else utils.learn_manifold(c.MANIFOLD_TYPES[0], nodes_2) nodes_1 = np.reshape(nodes_1, (window_size, n_centroids, 2)) # (50, 100, 1024) nodes_2 = np.reshape(nodes_2, (window_size, n_centroids, 2)) # (50, 100, 1024) print nodes_1.shape print nodes_2.shape # colors = plot_utils.tableau_category20() colors = plot_utils.colors_256() colors_1 = colors[:n_centroids] colors_2 = colors[n_centroids + 1: n_centroids + n_centroids + 1] __plot_centroids(nodes_1, window_size, n_centroids, colors_1, 1) __plot_centroids(nodes_2, window_size, n_centroids, colors_2, 2)
def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False): """ :param n_timesteps: How many timesteps per video. :param is_random_tr: Sample random or uniform frames. :param is_random_te: Sample random or uniform frames. :param is_shuffle_tr: To shuffle data or not. :param is_shuffle_te: To shuffle data or not. """ self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te self.__is_shuffle_tr = is_shuffle_tr self.__is_shuffle_te = is_shuffle_te self.__n_timesteps = n_timesteps frames_dict_path = Pth('Charades/annotation/frames_dict_all_frames.pkl') annotation_path = Pth('Charades/annotation/video_annotation.pkl') (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path) (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(annotation_path) self.__y_tr = self.__y_tr.astype(np.float32) self.__y_te = self.__y_te.astype(np.float32)