def _401_pickle_features_i3d_mixed_5c(): n_frames_per_video = 512 features_root_path = Pth('Breakfast/features_i3d_mixed_5c_%d_frames', (n_frames_per_video, )) features_path = Pth( 'Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl') (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path) n_tr = len(video_ids_tr) n_te = len(video_ids_te) n_frames_per_segment = 8 n_segments = int(n_frames_per_video / n_frames_per_segment) assert n_segments * n_frames_per_segment == n_frames_per_video f_tr = np.zeros((n_tr, n_segments, 7, 7, 1024), dtype=np.float16) f_te = np.zeros((n_te, n_segments, 7, 7, 1024), dtype=np.float16) for i in range(n_tr): utils.print_counter(i, n_tr, 100) p = '%s/%s.pkl' % (features_root_path, video_ids_tr[i]) f = utils.pkl_load(p) # (T, 7, 7, 2048) f_tr[i] = f for i in range(n_te): utils.print_counter(i, n_te, 100) p = '%s/%s.pkl' % (features_root_path, video_ids_te[i]) f = utils.pkl_load(p) # (T, 7, 7, 2048) f_te[i] = f print f_tr.shape print f_te.shape print(utils.get_size_in_gb(utils.get_array_memory_size(f_tr))) print(utils.get_size_in_gb(utils.get_array_memory_size(f_te))) data_names = ['x_tr', 'x_te'] utils.h5_dump_multi((f_tr, f_te), data_names, features_path)
def _06_get_graph_edges(): # load data n_timesteps = 64 n_centroids = 128 is_max_layer = True model_name = 'classifier_19.02.21-01:00:30' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,)) if is_max_layer: edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_%s.h5', (model_name,)) edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_reduced_%s.pkl', (model_name,)) layer_name = 'pool_t_1' n_timesteps = 21 n_nodes = 10 else: edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_%s.h5', (model_name,)) edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_reduced_%s.pkl', (model_name,)) layer_name = 'leaky_re_lu_3' n_timesteps = 64 n_nodes = 32 v_input_n = utils.pkl_load(centroids_path) (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) epoch_num = 133 batch_size = 40 model = __load_model(model_name, epoch_num) t_input_n = model.get_layer('input_n').input t_input_x = model.get_layer('input_x').input t_activations = model.get_layer(layer_name).output # (None * 64, 32, 1, 1, 1024) keras_session = K.get_session() # 1357 train, 335 test vals_tr = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_tr) # (None*64, 32, 1, 1, 1024) vals_te = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_te) # (None*64, 32, 1, 1, 1024) vals_tr = np.squeeze(vals_tr, axis=2) vals_tr = np.squeeze(vals_tr, axis=2) vals_te = np.squeeze(vals_te, axis=2) vals_te = np.squeeze(vals_te, axis=2) n_tr = 1357 n_te = 355 if is_max_layer: vals_tr = np.reshape(vals_tr, (n_tr, n_nodes, n_timesteps, 1024)) # (None, timesteps, nodes, feat_size), (1357, 10, 21, 1024) vals_te = np.reshape(vals_te, (n_te, n_nodes, n_timesteps, 1024)) # (None, timesteps, nodes, feat_size), (355, 10, 21, 1024) else: vals_tr = np.reshape(vals_tr, (n_tr, n_timesteps, n_nodes, 1024)) # (None, timesteps, nodes, feat_size), (1357, 64, 32, 1024) vals_te = np.reshape(vals_te, (n_te, n_timesteps, n_nodes, 1024)) # (None, timesteps, nodes, feat_size), (355, 64, 32, 1024) print ('finally') print x_tr.shape print x_te.shape print vals_tr.shape print vals_te.shape utils.h5_dump_multi((vals_tr, vals_te), ['x_tr', 'x_te'], edge_values_path) vals_tr = np.mean(vals_tr, axis=3) vals_te = np.mean(vals_te, axis=3) utils.pkl_dump((vals_tr, vals_te), edge_pooled_values_path)
model = model.cuda() model.load_state_dict(torch.load(path_model)) model.eval() batch_size = 32 # Run actual feed-forward here alphas = pytorch_utils.batched_feedforward_multi( model, [x_te, x_te_c1, x_te_c2, x_te_c3, x_te_c4], batch_size, func_name='forward_for_alpha') y_pred_te = pytorch_utils.batched_feedforward_multi( model, [x_te, x_te_c1, x_te_c2, x_te_c3, x_te_c4], batch_size, func_name='inference') utils.h5_dump_multi((alphas, y_pred_te), ['alphas', 'y_pred_te'], path_save) print('shape_of_result', y_pred_te.shape) print('shape_of_alphas', alphas.shape) alphas = np.mean(alphas, 2) print('shape_of_alphas', alphas.shape) print('mean:', np.mean(alphas, 0)) print('var:', np.var(alphas, 0)) # Evalaute results y_pred_te = y_te_mask * y_pred_te y_te = y_te * y_te_mask acc_te = metric_fn(y_pred_te, y_te)
def _401_extract_features_i3d(): """ Extract i3d features. :return: """ n_threads = 32 n_frames_per_segment = 8 n_segments_per_video = 64 n_frames_per_video = n_segments_per_video * n_frames_per_segment video_names_splits_path = Pth('EPIC-Kitchens/annotation/video_names_splits.pkl') sampled_frames_relative_pathes = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_uniform_sample.pkl') frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train') features_path = Pth('EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video,)) (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) (sampled_frames_tr, sampled_frames_te) = utils.pkl_load(sampled_frames_relative_pathes) video_names = np.hstack((video_names_tr, video_names_te)) sampled_frames = np.vstack((sampled_frames_tr, sampled_frames_te)) n_videos = len(video_names) n_tr = len(video_names_tr) img_reader = image_utils.AsyncImageReaderEpicKitchensForI3dKerasModel(n_threads=n_threads) v_frames_pathes = np.array(['%s/%s' % (frames_root_path, p) for p in sampled_frames[0]]) img_reader.load_imgs_in_batch(v_frames_pathes) model = Inception_Inflated3d_Backbone() print (model.summary()) model = multi_gpu_utils.multi_gpu_model(model, 4) dataset_features = np.zeros((n_videos, n_segments_per_video, 7, 7, 1024), dtype=np.float32) # loop on videos, extract features and save them for idx_video, v_name in enumerate(video_names): video_num = idx_video + 1 # wait untill the image_batch is loaded t1 = time.time() while img_reader.is_busy(): threading._sleep(0.1) t2 = time.time() duration_waited = t2 - t1 print ('...... video %d/%d:, waited: %d' % (video_num, n_videos, duration_waited)) # get the video frames video_frames = img_reader.get_images() # reshape to get the segments in one dimension frames_shape = video_frames.shape frames_shape = [n_segments_per_video, n_frames_per_segment] + list(frames_shape[1:]) video_frames = np.reshape(video_frames, frames_shape) # pre-load for the next video if video_num < n_videos: v_frames_pathes = np.array(['%s/%s' % (frames_root_path, p) for p in sampled_frames[idx_video + 1]]) img_reader.load_imgs_in_batch(v_frames_pathes) # extract features features = model.predict(video_frames, verbose=0) # remove the temporal dimension features = np.squeeze(features, axis=1) # append feature to list dataset_features[idx_video] = features # split features features_tr = dataset_features[:n_tr] features_te = dataset_features[n_tr:] print features_tr.shape print features_te.shape # save features utils.h5_dump_multi((features_tr, features_te), ['x_tr', 'x_te'], features_path)