def _804_random_sample_frames_for_i3d_test_video_level_by_split( split_type='train'): assert split_type in ['train', 'test' ], 'Sorry, unknown split type: %s' % (split_type) is_train = split_type == 'train' file_name_suffix = 'tr' if is_train else 'te' root_path_id = 0 if configs.is_local_machine() else 5 frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train', root_type=c.ROOT_PATH_TYPES[root_path_id]) frame_relative_pathes_dict_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_%s.pkl', (file_name_suffix, )) video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) video_names = video_names_tr if is_train else video_names_te del video_names_tr del video_names_te frame_relative_pathes_dict = utils.pkl_load( frame_relative_pathes_dict_path) # loop on the videos and sample frames for i3d n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment sampled_frames = __random_sample_frames_per_video_for_i3d( video_names, frames_root_path, frame_relative_pathes_dict, n_frames_per_segment, n_frames_per_video) return sampled_frames
def __init__(self, n_timesteps, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False): """ :param n_timesteps: How many timesteps per video. :param is_random_tr: Sample random or uniform frames. :param is_random_te: Sample random or uniform frames. :param is_shuffle_tr: To shuffle data or not. :param is_shuffle_te: To shuffle data or not. """ frames_dict_path = Pth('Charades/annotation/frames_dict_all_frames.pkl') annotation_path = Pth('Charades/annotation/video_annotation.pkl') self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te self.__is_shuffle_tr = is_shuffle_tr self.__is_shuffle_te = is_shuffle_te self.__n_timesteps = n_timesteps self.__n_frames_per_segment = 8 self.__n_frames = self.__n_timesteps * self.__n_frames_per_segment (self.__video_frames_dict_tr, self.__video_frames_dict_te) = utils.pkl_load(frames_dict_path) (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(annotation_path) self.current_train = None self.current_test = None
def _800_prepare_video_frames_path_dict(): frame_relative_pathes_dict_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict.pkl') video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') imgs_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) video_names = np.hstack((video_names_tr, video_names_te)) frame_relative_pathes_dict = {} n_videos = len(video_names) for idx, video_id in enumerate(video_names): utils.print_counter(idx, n_videos) person_id = video_id.split('_')[0] video_frames_root_path = '%s/%s/%s' % (imgs_root_path, person_id, video_id) video_frames_names = utils.file_names(video_frames_root_path, is_nat_sort=True) video_frames_names = np.array(video_frames_names) video_frames_relative_pathes = np.array([ '%s/%s/%s' % (person_id, video_id, n) for n in video_frames_names ]) frame_relative_pathes_dict[video_id] = video_frames_relative_pathes utils.pkl_dump(frame_relative_pathes_dict, frame_relative_pathes_dict_path)
def _802_uniform_sample_frames_for_i3d_test_video_level(): video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') frame_relative_pathes_dict_tr_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_tr.pkl') frame_relative_pathes_dict_te_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_te.pkl') sampled_frames_relative_pathes = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_uniform_sample.pkl') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frame_relative_pathes_dict_tr = utils.pkl_load( frame_relative_pathes_dict_tr_path) frame_relative_pathes_dict_te = utils.pkl_load( frame_relative_pathes_dict_te_path) n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment sampled_frames_tr = __uniform_sample_frames_per_video_for_i3d( video_names_tr, frame_relative_pathes_dict_tr, n_frames_per_segment, n_frames_per_video) sampled_frames_te = __uniform_sample_frames_per_video_for_i3d( video_names_te, frame_relative_pathes_dict_te, n_frames_per_segment, n_frames_per_video) data = (sampled_frames_tr, sampled_frames_te) utils.pkl_dump(data, sampled_frames_relative_pathes)
def _05_visualize_attention_values(): # load data n_timesteps = 64 n_centroids = 128 model_name = 'classifier_19.02.21-01:00:30' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') frames_annot_path = Pth('Breakfast/annotation/annot_frames_i3d_%d.pkl', (512,)) attention_values_path = Pth('Breakfast/qualitative_results/node_attention_%s.pkl', (model_name,)) n_classes = ds_breakfast.N_CLASSES_ACTIVITIES frames_annot = utils.pkl_load(frames_annot_path) (video_ids_tr, y_tr), (video_ids_te, y_te) = utils.pkl_load(gt_activities_path) y_tr = utils.debinarize_label(y_tr) y_te = utils.debinarize_label(y_te) (att_tr, att_te) = utils.pkl_load(attention_values_path) # (1357, 64, 128), (355, 64, 128) attentions_tr = np.array([np.average(att_tr[np.where(y_tr == idx_class)[0]], axis=(0, 1)) for idx_class in range(n_classes)]) # (10, 128) attentions_te = np.array([np.average(att_te[np.where(y_te == idx_class)[0]], axis=(0, 1)) for idx_class in range(n_classes)]) # (10, 128) # remove least attended centroids all_attn_vals = np.mean(attentions_tr, axis=1) _ = 10
def _803_random_sample_frames_for_i3d_test_video_level(): video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') frame_relative_pathes_dict_tr_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_tr.pkl') frame_relative_pathes_dict_te_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_te.pkl') root_path_id = 0 if configs.is_local_machine() else 5 frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train', root_type=c.ROOT_PATH_TYPES[root_path_id]) (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frame_relative_pathes_dict_tr = utils.pkl_load( frame_relative_pathes_dict_tr_path) frame_relative_pathes_dict_te = utils.pkl_load( frame_relative_pathes_dict_te_path) # loop on the videos and sample frames for i3d n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment sampled_frames_tr = __random_sample_frames_per_video_for_i3d( video_names_tr, frames_root_path, frame_relative_pathes_dict_tr, n_frames_per_segment, n_frames_per_video) sampled_frames_te = __random_sample_frames_per_video_for_i3d( video_names_te, frames_root_path, frame_relative_pathes_dict_te, n_frames_per_segment, n_frames_per_video) return (sampled_frames_tr, sampled_frames_te)
def test_model_predictions_on_images(): weight_path = Pth('Torch_Models/ResNet/resnet50_places365.pth.tar') category_list_path = Pth('Places365/annotation/categories_places365.txt') # load the class label category_list = utils.txt_load(category_list_path) # load the pre-trained weights model = __load_model_pretrained(weight_path) model = model.cuda() model.eval() image_names = ['01.jpg', '02.jpg', '03.jpg', '12.jpg'] for image_name in image_names: image_path = '/home/nour/Pictures/scene_images/%s' % image_name img = __read_image_preprocessed(image_path) img = torch.from_numpy(np.array([img])).cuda() # forward pass logit = model.forward_no_activation(img) h_x = F.softmax(logit, 1).data.squeeze() probs, idx = h_x.sort(0, True) print('\n prediction on {}'.format(image_name, )) # output the prediction for i in range(0, 5): print('{:.3f} -> {}'.format(probs[i], category_list[idx[i]]))
def _703_prepare_data_splits(): """ Sample fram pathes for the i3d model. :return: """ annot_dict_path = Pth( 'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl') annot_idxes_many_shots_path = Pth( 'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl') video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path) annot_dict = utils.pkl_load(annot_dict_path) # split_ratio split_ratio = 0.8 person_videos_dict = {} # first loop to collect all unique video ids for annot_id in annot_idxes_many_shots: annot_line = annot_dict[annot_id] person_id = annot_line[0] video_id = annot_line[1] if person_id not in person_videos_dict: person_videos_dict[person_id] = [] person_videos_dict[person_id].append(video_id) for person_id in person_videos_dict: video_names = natsort.natsorted( np.unique(person_videos_dict[person_id])) person_videos_dict[person_id] = video_names # now that we have collected the persons, and their videos, see how much videos if we split video_names_tr = [] video_names_te = [] for person_id in person_videos_dict: v_names = person_videos_dict[person_id] idx = int(len(v_names) * split_ratio) v_names_tr = v_names[:idx] v_names_te = v_names[idx:] video_names_tr += v_names_tr video_names_te += v_names_te video_names_tr = np.array(video_names_tr) video_names_te = np.array(video_names_te) print len(video_names_tr) + len(video_names_te) print len(video_names_tr) print len(video_names_te) # save video names utils.pkl_dump((video_names_tr, video_names_te), video_names_splits_path)
def __load_model(model_name, epoch_num): model_root_path = Pth('Breakfast/models/%s', (model_name,)) nodes_root_path = Pth('Breakfast/qualitative_results/node_embedding_%s' % (model_name,)) custom_objects = {'tf': tf, 'ExpandDimsLayer': ExpandDimsLayer, 'MeanLayer': MeanLayer, 'MaxLayer': MaxLayer, 'TransposeLayer': TransposeLayer, 'ReshapeLayer': ReshapeLayer, 'DepthwiseConv1DLayer': DepthwiseConv1DLayer} json_path = '%s/%03d.json' % (model_root_path, epoch_num) weight_path = '%s/%03d.pkl' % (model_root_path, epoch_num) model = keras_utils.load_model(json_path, weight_path, custom_objects=custom_objects, is_compile=False) return model
def _103_prepare_video_info(): video_info_path = Pth('Breakfast/annotation/video_info.pkl') annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl') (video_relative_pathes_tr, _, video_relative_pathes_te, _) = utils.pkl_load(annot_activities_path) video_relative_pathes = np.hstack( (video_relative_pathes_tr, video_relative_pathes_te)) n_videos = len(video_relative_pathes) video_info = dict() fps, n_frames, duration = [], [], [] # loop on the videos for idx_video, video_relative_path in enumerate(video_relative_pathes): utils.print_counter(idx_video, n_videos, 100) video_path = Pth('Breakfast/videos/%s', (video_relative_path, )) video_id = __video_relative_path_to_video_id(video_relative_path) try: v_fps, v_n_frames, v_duration = video_utils.get_video_info( video_path) except: print video_relative_path continue fps.append(v_fps) n_frames.append(v_n_frames) duration.append(v_duration) video_info[video_id] = { 'duration': v_duration, 'fps': v_fps, 'n_frames': v_n_frames } print np.mean(fps), np.std(fps), np.min(fps), np.max(fps) print np.mean(duration), np.std(duration), np.min(duration), np.max( duration) print np.mean(n_frames), np.std(n_frames), np.min(n_frames), np.max( n_frames) # 15.0 0.0 15.0 15.0 # 140.30865654205607 121.76493338896255 12.4 649.67 # 2105.308995327103 1826.5189539717755 187 9746 utils.pkl_dump(video_info, video_info_path)
def __sample_frames(video_relative_pathes, n_frames_per_video, model_type): video_frames_dict = dict() n_videos = len(video_relative_pathes) assert model_type in ['resnet', 'i3d', 'non_local'] for idx_video, video_relative_path in enumerate(video_relative_pathes): utils.print_counter(idx_video, n_videos, 100) video_id = __video_relative_path_to_video_id(video_relative_path) # get all frames of the video frames_root_path = Pth('Breakfast/frames/%s', (video_id, )) video_frame_names = utils.file_names(frames_root_path, is_nat_sort=True) # sample from these frames if model_type == 'resnet': video_frame_names = __sample_frames_for_resnet( video_frame_names, n_frames_per_video) elif model_type == 'i3d': video_frame_names = __sample_frames_for_i3d( video_frame_names, n_frames_per_video) elif model_type == 'non_local': video_frame_names = __sample_frames_for_non_local( video_frame_names, n_frames_per_video) else: raise Exception('Unkonwn model type: %s' % (model_type)) n_frames = len(video_frame_names) assert n_frames == n_frames_per_video video_frames_dict[video_id] = video_frame_names return video_frames_dict
def __get_resne50_for_finetuning_on_hico(): # load model and weights # model_path = Pth('Torch_Models/ResNet/resnet50-19c8e357.pth') # model.fc = nn.Linear(2048, 10) model_path = Pth('Torch_Models/ResNet/resnet50-19c8e357.pth') model_dict = torch.load(model_path) # define model model = ResNet50Hico() # load weights model.load_state_dict(model_dict, strict=True) # freeze all but last block layer_names = ['bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3'] pytorch_utils.freeze_model_layers_recursive(model, layer_names) # prepare for fine-tuning model._prepare_for_finetuning() # as cuda model = model.cuda() loss_fn = F.binary_cross_entropy metric_fn = pytorch_utils.METRIC_FUNCTIONS.ap_hico optimizer = optim.Adam(model.parameters(), lr=0.001, eps=1e-4) return model, optimizer, loss_fn, metric_fn
def _105_prepare_action_gt_timestamped(): """ Get ground truth of unit-actions with their timestamps. :return: """ root_path = c.DATA_ROOT_PATH video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl') unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl') gt_actions_path = Pth( 'Breakfast/annotation/gt_unit_actions_timestamped.pkl') (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path) unit_actions = utils.pkl_load(unit_actions_path) video_pathes_tr = [ '%s/Breakfast/videos/%s' % ( root_path, __video_video_id_to_video_relative_path(id, False), ) for id in video_ids_tr ] video_pathes_te = [ '%s/Breakfast/videos/%s' % ( root_path, __video_video_id_to_video_relative_path(id, False), ) for id in video_ids_te ] gt_actions_te = __get_gt_actions_timestamped(video_pathes_te, unit_actions) gt_actions_tr = __get_gt_actions_timestamped(video_pathes_tr, unit_actions) gt_actions_tr = np.array(gt_actions_tr) gt_actions_te = np.array(gt_actions_te) l_tr = [len(i) for i in gt_actions_tr] l_te = [len(i) for i in gt_actions_te] print('mean, std, min, max for number of nodes in each video [tr/te]') print np.mean(l_tr), np.std(l_tr), np.min(l_tr), np.max(l_tr) print np.mean(l_te), np.std(l_te), np.min(l_te), np.max(l_te) print gt_actions_tr.shape print gt_actions_te.shape utils.pkl_dump( ((video_ids_tr, gt_actions_tr), (video_ids_te, gt_actions_te)), gt_actions_path)
def _501_generate_centroids(n_centroids, n_dims): c1_path = Pth( 'Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids, )) c2_path = Pth( 'Breakfast/features_centroids/features_sobol_%d_centroids.pkl', (n_centroids, )) # centroids as random vectors c1 = np.random.rand(n_centroids, n_dims) # centroids as sobol sequence c2 = sobol.sobol_generate(n_dims, n_centroids) c2 = np.array(c2) # save centroids utils.pkl_dump(c1, c1_path) utils.pkl_dump(c2, c2_path)
def __init__(self, features_root_path, n_timesteps, n_timesteps_total, is_random_tr, is_random_te): gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path) self.__feature_root_path = features_root_path self.__n_timesteps_total = n_timesteps_total self.__n_timesteps = n_timesteps self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te
def _602_generate_nodes(n_nodes, n_dims): pass n1_path = Pth('EPIC-Kitchens/features_centroid/features_random_%d.pkl', (n_nodes,)) n2_path = Pth('EPIC-Kitchens/features_centroid/features_sobol_%d.pkl', (n_nodes,)) # nodes as random vectors n1 = np.random.rand(n_nodes, n_dims) # nodes as sobol sequence n2 = sobol.sobol_generate(n_dims, n_nodes) n2 = np.array(n2) print n1.shape print n2.shape # save nodes utils.pkl_dump(n1, n1_path) utils.pkl_dump(n2, n2_path)
def _202_spit_video_frames_relative_pathes(): video_names_splits_path = Pth('EPIC-Kitchens/annotation/video_names_splits.pkl') frame_relative_pathes_dict_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict.pkl') frame_relative_pathes_dict_tr_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_tr.pkl') frame_relative_pathes_dict_te_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_te.pkl') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frames_dict = utils.pkl_load(frame_relative_pathes_dict_path) dict_tr = dict() dict_te = dict() for v_name in video_names_tr: dict_tr[v_name] = frames_dict[v_name] for v_name in video_names_te: dict_te[v_name] = frames_dict[v_name] utils.pkl_dump(dict_tr, frame_relative_pathes_dict_tr_path) utils.pkl_dump(dict_te, frame_relative_pathes_dict_te_path)
def _401_pickle_features_i3d_mixed_5c(): n_frames_per_video = 512 features_root_path = Pth('Breakfast/features_i3d_mixed_5c_%d_frames', (n_frames_per_video, )) features_path = Pth( 'Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl') (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path) n_tr = len(video_ids_tr) n_te = len(video_ids_te) n_frames_per_segment = 8 n_segments = int(n_frames_per_video / n_frames_per_segment) assert n_segments * n_frames_per_segment == n_frames_per_video f_tr = np.zeros((n_tr, n_segments, 7, 7, 1024), dtype=np.float16) f_te = np.zeros((n_te, n_segments, 7, 7, 1024), dtype=np.float16) for i in range(n_tr): utils.print_counter(i, n_tr, 100) p = '%s/%s.pkl' % (features_root_path, video_ids_tr[i]) f = utils.pkl_load(p) # (T, 7, 7, 2048) f_tr[i] = f for i in range(n_te): utils.print_counter(i, n_te, 100) p = '%s/%s.pkl' % (features_root_path, video_ids_te[i]) f = utils.pkl_load(p) # (T, 7, 7, 2048) f_te[i] = f print f_tr.shape print f_te.shape print(utils.get_size_in_gb(utils.get_array_memory_size(f_tr))) print(utils.get_size_in_gb(utils.get_array_memory_size(f_te))) data_names = ['x_tr', 'x_te'] utils.h5_dump_multi((f_tr, f_te), data_names, features_path)
def _201_extract_frames(): """ This function extracts all frames from all the videos in the dataset. Make sure to download the dataset from http://serre-lab.clps.brown.edu/resource/breakfast-actions-dataset/#Downloads Then save all videos under one directory. For example, for Person "P05", the videos are stored in Breakfast/videos/P05/stereo/P05_milk_ch0.avi Breakfast/videos/P05/stereo/P05_milk_ch1.avi Breakfast/videos/P05/stereo/........ Breakfast/videos/P05/cam01/P05_cereals.avi Breakfast/videos/P05/cam01/P05_coffee.avi Breakfast/videos/P05/cam01/.... Breakfast/videos/P05/..... And so on, so forth. """ annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl') (video_relative_pathes_tr, _, video_relative_pathes_te, _) = utils.pkl_load(annot_activities_path) video_relative_pathes = np.hstack( (video_relative_pathes_tr, video_relative_pathes_te)) n_videos = len(video_relative_pathes) image_name_format = '%s/%06d.jpg' for idx_video, video_relative_path in enumerate(video_relative_pathes): t1 = time.time() video_id = __video_relative_path_to_video_id(video_relative_path) video_path = Pth('Breakfast/videos/%s', (video_relative_path)) # path to to store video frames video_frames_root_path = Pth('Breakfast/frames/%s', (video_id)) if not os.path.exists(video_frames_root_path): os.mkdir(video_frames_root_path) # save all frames to disc video_utils.video_save_frames(video_path, video_frames_root_path, image_name_format, c.RESIZE_TYPES[1]) t2 = time.time() duration = t2 - t1 print('%03d/%03d, %d sec' % (idx_video + 1, n_videos, duration))
def _01_get_nodes_over_epochs(): """ Get centroids of the model. :return: """ n_centroids = 128 n_epochs = 100 model_name = 'classifier_19.02.21-01:00:30' model_root_path = Pth('Breakfast/models/%s', (model_name,)) centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,)) nodes_root_path = Pth('Breakfast/qualitative_results/node_embedding_%s' % (model_name,)) v_input_nodes = utils.pkl_load(centroids_path) model = None t_input_nodes = None t_node_embedding = None keras_session = K.get_session() for idx_epoch in range(n_epochs): utils.print_counter(idx_epoch, n_epochs) epoch_num = idx_epoch + 1 weight_path = '%s/%03d.pkl' % (model_root_path, epoch_num) if epoch_num == 1: model = __load_model(model_name, epoch_num) t_input_nodes = model.get_layer('input_n').input t_node_embedding = model.get_layer('node_embedding').output else: model.load_weights(weight_path) v_node_embedding, = keras_session.run([t_node_embedding], {t_input_nodes: v_input_nodes}) # (1, 128, 1024) v_node_embedding = np.squeeze(v_node_embedding, axis=0) # (1, 128, 1024) path = '%s/%02d.pkl' % (nodes_root_path, epoch_num) utils.pkl_dump(v_node_embedding, path) pass
def __init__(self, img_root_path, is_shuffle_tr=True, is_shuffle_te=False): annot_path = Pth('Hico/annotation/anno_hico.pkl') (self.img_names_tr, self.y_tr, self.img_names_te, self.y_te) = utils.pkl_load(annot_path) self.y_tr = self.y_tr.astype(np.float32) self.y_te = self.y_te.astype(np.float32) self.is_shuffle_tr = is_shuffle_tr self.is_shuffle_te = is_shuffle_te self.img_names_tr = np.array(['%s/%s' % (img_root_path, n) for n in self.img_names_tr]) self.img_names_te = np.array(['%s/%s' % (img_root_path, n) for n in self.img_names_te])