def _802_uniform_sample_frames_for_i3d_test_video_level(): video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') frame_relative_pathes_dict_tr_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_tr.pkl') frame_relative_pathes_dict_te_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict_te.pkl') sampled_frames_relative_pathes = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_uniform_sample.pkl') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frame_relative_pathes_dict_tr = utils.pkl_load( frame_relative_pathes_dict_tr_path) frame_relative_pathes_dict_te = utils.pkl_load( frame_relative_pathes_dict_te_path) n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment sampled_frames_tr = __uniform_sample_frames_per_video_for_i3d( video_names_tr, frame_relative_pathes_dict_tr, n_frames_per_segment, n_frames_per_video) sampled_frames_te = __uniform_sample_frames_per_video_for_i3d( video_names_te, frame_relative_pathes_dict_te, n_frames_per_segment, n_frames_per_video) data = (sampled_frames_tr, sampled_frames_te) utils.pkl_dump(data, sampled_frames_relative_pathes)
def _08_prepare_annotation_frames_per_video_dict_multi_label(): """ Get list of frames from each video. With max 600 of each video and min 100 frames from each video. These frames will be used to extract features for each video. """ min_frames_per_video = 100 max_frames_per_video = 100 root_path = c.data_root_path annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % ( root_path) annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % ( root_path) annotation_path = '%s/Charades/annotation/frames_dict_multi_label.pkl' % ( root_path) video_frames_dict_tr = __get_frame_names_from_csv_file( annot_tr_text_path, min_frames_per_video, max_frames_per_video) video_frames_dict_te = __get_frame_names_from_csv_file( annot_te_text_path, min_frames_per_video, max_frames_per_video) utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te), annotation_path, is_highest=True)
def _800_prepare_video_frames_path_dict(): frame_relative_pathes_dict_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict.pkl') video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') imgs_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) video_names = np.hstack((video_names_tr, video_names_te)) frame_relative_pathes_dict = {} n_videos = len(video_names) for idx, video_id in enumerate(video_names): utils.print_counter(idx, n_videos) person_id = video_id.split('_')[0] video_frames_root_path = '%s/%s/%s' % (imgs_root_path, person_id, video_id) video_frames_names = utils.file_names(video_frames_root_path, is_nat_sort=True) video_frames_names = np.array(video_frames_names) video_frames_relative_pathes = np.array([ '%s/%s/%s' % (person_id, video_id, n) for n in video_frames_names ]) frame_relative_pathes_dict[video_id] = video_frames_relative_pathes utils.pkl_dump(frame_relative_pathes_dict, frame_relative_pathes_dict_path)
def _12_prepare_annotation_frames_per_video_dict_multi_label_all_frames(): """ Get list of frames from each video. All frames for each video. """ n_frames_per_video = None root_path = c.data_root_path annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % ( root_path) annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % ( root_path) annotation_path = '%s/Charades/annotation/frames_dict_multi_label_all_frames.pkl' % ( root_path) video_frames_dict_tr = __get_frame_names_from_csv_file(annot_tr_text_path, n_frames_per_video, n_frames_per_video, sampling=False) video_frames_dict_te = __get_frame_names_from_csv_file(annot_te_text_path, n_frames_per_video, n_frames_per_video, sampling=False) utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te), annotation_path, is_highest=True)
def _13_prepare_annotation_frames_per_video_dict_untrimmed_multi_label_for_i3d( n_frames_per_video): """ 从视频帧当中进行帧采样 Uniformly sample sequences of frames form each video. Each sequences consists of 8 successive frames. n_frames_per_video = 1024 || 512 || 256 """ # root_path = c.DATA_ROOT_PATH root_path = c.data_root_path annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % ( root_path) #'./data/Charades/annotation/Charades_v1_train.csv' annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % ( root_path) annotation_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % ( root_path, n_frames_per_video) #进行采样:每8个连续帧作为一个视频段 video_frames_dict_tr = __get_frame_names_untrimmed_from_csv_file_for_i3d( annot_tr_text_path, n_frames_per_video) video_frames_dict_te = __get_frame_names_untrimmed_from_csv_file_for_i3d( annot_te_text_path, n_frames_per_video) utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te), annotation_path, is_highest=True)
def _14_prepare_annotation_frames_per_video_dict_untrimmed_multi_label_for_resnet_ordered( ): """ Get list of frames from each video. With max 600 of each video and min 96 frames from each video. These frames will be used to extract features for each video. """ # if required frames per video are 128, there are 51/6 out of 7986/1864 videos in training/testing splits that don't satisfy this n_frames_per_video = 32 root_path = c.data_root_path annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % ( root_path) annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % ( root_path) annotation_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_resnet_ordered_%d_frames.pkl' % ( root_path, n_frames_per_video) video_frames_dict_tr = __get_frame_names_untrimmed_from_csv_file_for_ordered( annot_tr_text_path, n_frames_per_video, is_resnet=True) video_frames_dict_te = __get_frame_names_untrimmed_from_csv_file_for_ordered( annot_te_text_path, n_frames_per_video, is_resnet=True) utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te), annotation_path, is_highest=True)
def _02_prepare_annotation_frame_dict(is_training=True): root_path = c.data_root_path annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % ( root_path) annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % ( root_path) annotation_pkl_tr_path = '%s/Charades/annotation/frames_dict_tr.pkl' % ( root_path) annotation_pkl_te_path = '%s/Charades/annotation/frames_dict_te.pkl' % ( root_path) annot_text_path = annot_tr_text_path if is_training else annot_te_text_path annotation_pkl_path = annotation_pkl_tr_path if is_training else annotation_pkl_te_path annotation_dict = {} n_actions = N_CLASSES frames_per_instance = [] # add empty list for each action in the annotation dictionary for idx_action in range(n_actions): action_num = idx_action + 1 # 保存的时候也是索引也是从1开始 annotation_dict[action_num] = [] # 获取每一类动作对应的acion的路径 with open(annot_text_path) as f: reader = csv.DictReader(f) # 每一行以字典的形式读出 for row in reader: # action_strings的格式如下:c092 11.90 21.20;c147 0.00 12.60 action_strings = row['actions'] # 动作的持续时间 action_strings_splits = action_strings.split(';') video_id = row['id'] # charades中的视频是以id存在的 if len(action_strings) == 0: print('... no action for video %s' % (video_id)) continue for action_st in action_strings_splits: action_splits = action_st.split(' ') action_idx = int(action_splits[0][1:]) # 第几个动作 action_num = action_idx + 1 action_start = action_splits[1] action_end = action_splits[2] # add frames # 1.获取指定动作区间的动作帧所在的路径 frames_relative_path = __get_frames_relative_pathes_in_given_duration( video_id, action_start, action_end) annotation_dict[action_num].append(frames_relative_path) # accumulate counter n_frames_per_instance = len( frames_relative_path) # 每一个动作实例所持续的时间(就是帧数) frames_per_instance.append(n_frames_per_instance) # [] # save annotation utils.pkl_dump(annotation_dict, annotation_pkl_path, is_highest=True) print(frames_per_instance) print(len(frames_per_instance)) print(np.sum(frames_per_instance)) print(np.average(frames_per_instance))
def _703_prepare_data_splits(): """ Sample fram pathes for the i3d model. :return: """ annot_dict_path = Pth( 'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl') annot_idxes_many_shots_path = Pth( 'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl') video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path) annot_dict = utils.pkl_load(annot_dict_path) # split_ratio split_ratio = 0.8 person_videos_dict = {} # first loop to collect all unique video ids for annot_id in annot_idxes_many_shots: annot_line = annot_dict[annot_id] person_id = annot_line[0] video_id = annot_line[1] if person_id not in person_videos_dict: person_videos_dict[person_id] = [] person_videos_dict[person_id].append(video_id) for person_id in person_videos_dict: video_names = natsort.natsorted( np.unique(person_videos_dict[person_id])) person_videos_dict[person_id] = video_names # now that we have collected the persons, and their videos, see how much videos if we split video_names_tr = [] video_names_te = [] for person_id in person_videos_dict: v_names = person_videos_dict[person_id] idx = int(len(v_names) * split_ratio) v_names_tr = v_names[:idx] v_names_te = v_names[idx:] video_names_tr += v_names_tr video_names_te += v_names_te video_names_tr = np.array(video_names_tr) video_names_te = np.array(video_names_te) print len(video_names_tr) + len(video_names_te) print len(video_names_tr) print len(video_names_te) # save video names utils.pkl_dump((video_names_tr, video_names_te), video_names_splits_path)
def _02_prepare_annotation_frame_dict(is_training=True): root_path = c.data_root_path annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % ( root_path) annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % ( root_path) annotation_pkl_tr_path = '%s/Charades/annotation/frames_dict_tr.pkl' % ( root_path) annotation_pkl_te_path = '%s/Charades/annotation/frames_dict_te.pkl' % ( root_path) annot_text_path = annot_tr_text_path if is_training else annot_te_text_path annotation_pkl_path = annotation_pkl_tr_path if is_training else annotation_pkl_te_path annotation_dict = {} n_actions = N_CLASSES frames_per_instance = [] # add empty list for each action in the annotation dictionary for idx_action in range(n_actions): action_num = idx_action + 1 annotation_dict[action_num] = [] with open(annot_text_path) as f: reader = csv.DictReader(f) for row in reader: action_strings = row['actions'] action_strings_splits = action_strings.split(';') video_id = row['id'] if len(action_strings) == 0: print('... no action for video %s' % (video_id)) continue for action_st in action_strings_splits: action_splits = action_st.split(' ') action_idx = int(action_splits[0][1:]) action_num = action_idx + 1 action_start = action_splits[1] action_end = action_splits[2] # add frames frames_relative_path = __get_frames_relative_pathes_in_given_duration( video_id, action_start, action_end) annotation_dict[action_num].append(frames_relative_path) # accumulate counter n_frames_per_instance = len(frames_relative_path) frames_per_instance.append(n_frames_per_instance) # save annotation utils.pkl_dump(annotation_dict, annotation_pkl_path, is_highest=True) print(frames_per_instance) print(len(frames_per_instance)) print(np.sum(frames_per_instance)) print(np.average(frames_per_instance))
def _06_prepare_video_annotation_multi_label(): root_path = '.' video_annotation_path = '%s/Charades/annotation/video_annotation.pkl' % ( root_path) video_annotation_multi_label_path = '%s/Charades/annotation/video_annotation_multi_label.pkl' % ( root_path) (video_id_tr, y_tr, video_id_te, y_te) = utils.pkl_load(video_annotation_path) video_ids_tr = np.unique(video_id_tr) video_ids_te = np.unique(video_id_te) n_tr = len(video_ids_tr) n_te = len(video_ids_te) n_classes = N_CLASSES video_gt_dict_tr = dict() video_gt_dict_te = dict() for id in video_ids_tr: video_gt_dict_tr[id] = [] for id in video_ids_te: video_gt_dict_te[id] = [] """ zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。 如果各个迭代器的元素个数不一致,则返回列表长度与最短的对象相同,利用 * 号操作符,可以将元组解压为列表。 """ for i, j in zip(video_id_tr, y_tr): video_gt_dict_tr[i].append(j) for i, j in zip(video_id_te, y_te): video_gt_dict_te[i].append(j) # binarize labels of videos y_multi_label_tr = np.zeros((n_tr, n_classes), dtype=np.int) y_multi_label_te = np.zeros((n_te, n_classes), dtype=np.int) for idx_video, video_name in enumerate(video_ids_tr): idx_class = np.add(video_gt_dict_tr[video_name], -1) y_multi_label_tr[idx_video][idx_class] = 1 _ = 10 for idx_video, video_name in enumerate(video_ids_te): idx_class = np.add(video_gt_dict_te[video_name], -1) y_multi_label_te[idx_video][idx_class] = 1 _ = 10 data = (video_ids_tr, y_multi_label_tr, video_ids_te, y_multi_label_te) utils.pkl_dump(data, video_annotation_multi_label_path)
def __save_centroids(root_model, model_name, epoch_num): centroids_root_path = Pth('EPIC-Kitchens/node_features/%s', (model_name, )) centroids_path = '%s/%03d.pkl' % (centroids_root_path, epoch_num) if not os.path.exists(centroids_root_path): os.mkdir(centroids_root_path) session = K.get_session() t_centroids = root_model.get_layer( 'node_embedding').output # (1, 20, 1024) centroids_embedding = t_centroids.eval(session=session) # (1, 20, 1024) centroids_embedding = np.squeeze(centroids_embedding, axis=0) utils.pkl_dump(centroids_embedding, centroids_path)
def _103_prepare_video_info(): video_info_path = Pth('Breakfast/annotation/video_info.pkl') annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl') (video_relative_pathes_tr, _, video_relative_pathes_te, _) = utils.pkl_load(annot_activities_path) video_relative_pathes = np.hstack( (video_relative_pathes_tr, video_relative_pathes_te)) n_videos = len(video_relative_pathes) video_info = dict() fps, n_frames, duration = [], [], [] # loop on the videos for idx_video, video_relative_path in enumerate(video_relative_pathes): utils.print_counter(idx_video, n_videos, 100) video_path = Pth('Breakfast/videos/%s', (video_relative_path, )) video_id = __video_relative_path_to_video_id(video_relative_path) try: v_fps, v_n_frames, v_duration = video_utils.get_video_info( video_path) except: print video_relative_path continue fps.append(v_fps) n_frames.append(v_n_frames) duration.append(v_duration) video_info[video_id] = { 'duration': v_duration, 'fps': v_fps, 'n_frames': v_n_frames } print np.mean(fps), np.std(fps), np.min(fps), np.max(fps) print np.mean(duration), np.std(duration), np.min(duration), np.max( duration) print np.mean(n_frames), np.std(n_frames), np.min(n_frames), np.max( n_frames) # 15.0 0.0 15.0 15.0 # 140.30865654205607 121.76493338896255 12.4 649.67 # 2105.308995327103 1826.5189539717755 187 9746 utils.pkl_dump(video_info, video_info_path)
def _06_prepare_video_annotation_multi_label(): root_path = '.' video_annotation_path = '%s/Charades/annotation/video_annotation.pkl' % ( root_path) video_annotation_multi_label_path = '%s/Charades/annotation/video_annotation_multi_label.pkl' % ( root_path) (video_id_tr, y_tr, video_id_te, y_te) = utils.pkl_load(video_annotation_path) video_ids_tr = np.unique(video_id_tr) video_ids_te = np.unique(video_id_te) n_tr = len(video_ids_tr) n_te = len(video_ids_te) n_classes = N_CLASSES video_gt_dict_tr = dict() video_gt_dict_te = dict() for id in video_ids_tr: video_gt_dict_tr[id] = [] for id in video_ids_te: video_gt_dict_te[id] = [] for i, j in zip(video_id_tr, y_tr): video_gt_dict_tr[i].append(j) for i, j in zip(video_id_te, y_te): video_gt_dict_te[i].append(j) # binarize labels of videos y_multi_label_tr = np.zeros((n_tr, n_classes), dtype=np.int) y_multi_label_te = np.zeros((n_te, n_classes), dtype=np.int) for idx_video, video_name in enumerate(video_ids_tr): idx_class = np.add(video_gt_dict_tr[video_name], -1) y_multi_label_tr[idx_video][idx_class] = 1 _ = 10 for idx_video, video_name in enumerate(video_ids_te): idx_class = np.add(video_gt_dict_te[video_name], -1) y_multi_label_te[idx_video][idx_class] = 1 _ = 10 data = (video_ids_tr, y_multi_label_tr, video_ids_te, y_multi_label_te) utils.pkl_dump(data, video_annotation_multi_label_path)
def _13_prepare_annotation_frames_per_video_dict_untrimmed_multi_label_for_i3d(): """ Uniformly sample sequences of frames form each video. Each sequences consists of 8 successive frames. """ n_frames_per_video = 1024 n_frames_per_video = 128 n_frames_per_video = 256 root_path = c.data_root_path annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % (root_path) annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % (root_path) annotation_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (root_path, n_frames_per_video) video_frames_dict_tr = __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_tr_text_path, n_frames_per_video) video_frames_dict_te = __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_te_text_path, n_frames_per_video) utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te), annotation_path, is_highest=True)
def _01_prepare_annotation_class_names(): root_path = c.data_root_path annot_text_path = '%s/Charades/annotation/Charades_v1_classes.txt' % (root_path) annot_pkl_path = '%s/Charades/annotation/class_names.pkl' % (root_path) class_names = utils.txt_load(annot_text_path) class_ids = [int(n[1:5]) for n in class_names] for i_1, i_2 in zip(class_ids, np.arange(N_CLASSES)): assert i_1 == i_2 class_names = [n[5:] for n in class_names] class_names = np.array(class_names) utils.pkl_dump(class_names, annot_pkl_path, is_highest=True) _ = 10
def _105_prepare_action_gt_timestamped(): """ Get ground truth of unit-actions with their timestamps. :return: """ root_path = c.DATA_ROOT_PATH video_ids_path = Pth('Breakfast/annotation/video_ids_split.pkl') unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl') gt_actions_path = Pth( 'Breakfast/annotation/gt_unit_actions_timestamped.pkl') (video_ids_tr, video_ids_te) = utils.pkl_load(video_ids_path) unit_actions = utils.pkl_load(unit_actions_path) video_pathes_tr = [ '%s/Breakfast/videos/%s' % ( root_path, __video_video_id_to_video_relative_path(id, False), ) for id in video_ids_tr ] video_pathes_te = [ '%s/Breakfast/videos/%s' % ( root_path, __video_video_id_to_video_relative_path(id, False), ) for id in video_ids_te ] gt_actions_te = __get_gt_actions_timestamped(video_pathes_te, unit_actions) gt_actions_tr = __get_gt_actions_timestamped(video_pathes_tr, unit_actions) gt_actions_tr = np.array(gt_actions_tr) gt_actions_te = np.array(gt_actions_te) l_tr = [len(i) for i in gt_actions_tr] l_te = [len(i) for i in gt_actions_te] print('mean, std, min, max for number of nodes in each video [tr/te]') print np.mean(l_tr), np.std(l_tr), np.min(l_tr), np.max(l_tr) print np.mean(l_te), np.std(l_te), np.min(l_te), np.max(l_te) print gt_actions_tr.shape print gt_actions_te.shape utils.pkl_dump( ((video_ids_tr, gt_actions_tr), (video_ids_te, gt_actions_te)), gt_actions_path)
def _501_generate_centroids(n_centroids, n_dims): c1_path = Pth( 'Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids, )) c2_path = Pth( 'Breakfast/features_centroids/features_sobol_%d_centroids.pkl', (n_centroids, )) # centroids as random vectors c1 = np.random.rand(n_centroids, n_dims) # centroids as sobol sequence c2 = sobol.sobol_generate(n_dims, n_centroids) c2 = np.array(c2) # save centroids utils.pkl_dump(c1, c1_path) utils.pkl_dump(c2, c2_path)
def _602_generate_nodes(n_nodes, n_dims): pass n1_path = Pth('EPIC-Kitchens/features_centroid/features_random_%d.pkl', (n_nodes,)) n2_path = Pth('EPIC-Kitchens/features_centroid/features_sobol_%d.pkl', (n_nodes,)) # nodes as random vectors n1 = np.random.rand(n_nodes, n_dims) # nodes as sobol sequence n2 = sobol.sobol_generate(n_dims, n_nodes) n2 = np.array(n2) print n1.shape print n2.shape # save nodes utils.pkl_dump(n1, n1_path) utils.pkl_dump(n2, n2_path)
def _202_spit_video_frames_relative_pathes(): video_names_splits_path = Pth('EPIC-Kitchens/annotation/video_names_splits.pkl') frame_relative_pathes_dict_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict.pkl') frame_relative_pathes_dict_tr_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_tr.pkl') frame_relative_pathes_dict_te_path = Pth('EPIC-Kitchens/annotation/frame_relative_pathes_dict_te.pkl') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frames_dict = utils.pkl_load(frame_relative_pathes_dict_path) dict_tr = dict() dict_te = dict() for v_name in video_names_tr: dict_tr[v_name] = frames_dict[v_name] for v_name in video_names_te: dict_te[v_name] = frames_dict[v_name] utils.pkl_dump(dict_tr, frame_relative_pathes_dict_tr_path) utils.pkl_dump(dict_te, frame_relative_pathes_dict_te_path)
def _01_get_nodes_over_epochs(): """ Get centroids of the model. :return: """ n_centroids = 128 n_epochs = 100 model_name = 'classifier_19.02.21-01:00:30' model_root_path = Pth('Breakfast/models/%s', (model_name,)) centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,)) nodes_root_path = Pth('Breakfast/qualitative_results/node_embedding_%s' % (model_name,)) v_input_nodes = utils.pkl_load(centroids_path) model = None t_input_nodes = None t_node_embedding = None keras_session = K.get_session() for idx_epoch in range(n_epochs): utils.print_counter(idx_epoch, n_epochs) epoch_num = idx_epoch + 1 weight_path = '%s/%03d.pkl' % (model_root_path, epoch_num) if epoch_num == 1: model = __load_model(model_name, epoch_num) t_input_nodes = model.get_layer('input_n').input t_node_embedding = model.get_layer('node_embedding').output else: model.load_weights(weight_path) v_node_embedding, = keras_session.run([t_node_embedding], {t_input_nodes: v_input_nodes}) # (1, 128, 1024) v_node_embedding = np.squeeze(v_node_embedding, axis=0) # (1, 128, 1024) path = '%s/%02d.pkl' % (nodes_root_path, epoch_num) utils.pkl_dump(v_node_embedding, path) pass
def _04_get_activation_values(): # load data n_timesteps = 64 n_centroids = 128 model_name = 'classifier_19.02.21-01:00:30' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,)) attention_values_path = Pth('Breakfast/qualitative_results/node_attention_%s.pkl', (model_name,)) v_input_n = utils.pkl_load(centroids_path) (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) epoch_num = 133 model = __load_model(model_name, epoch_num) t_input_n = model.get_layer('input_n').input t_input_x = model.get_layer('input_x').input t_node_attention = model.get_layer('node_attention').output # # (None, 7, 7, 64, 100) keras_session = K.get_session() batch_size = 40 att_tr = __get_tensor_values(batch_size, keras_session, t_node_attention, t_input_n, t_input_x, v_input_n, x_tr) # (None, 1, 1, 64, 128) att_te = __get_tensor_values(batch_size, keras_session, t_node_attention, t_input_n, t_input_x, v_input_n, x_te) # (None, 1, 1, 64, 128) att_tr = np.squeeze(att_tr, axis=1) # (None, 1, 64, 128) att_tr = np.squeeze(att_tr, axis=1) # (None, 64, 128) att_te = np.squeeze(att_te, axis=1) # (None, 1, 64, 128) att_te = np.squeeze(att_te, axis=1) # (None, 64, 128) print ('finally') print x_tr.shape print x_te.shape print att_tr.shape print att_te.shape utils.pkl_dump((att_tr, att_te), attention_values_path)
def _204_sample_frames_non_local(): """ Uniformly sample sequences of frames form each video. Each sequences consists of 8 successive frames. """ n_frames_per_video = 512 model_type = 'non_local' annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl') frames_annot_path = Pth( 'Breakfast/annotation/annot_frames_non_local_%d.pkl', (n_frames_per_video, )) (video_relative_pathes_tr, _, video_relative_pathes_te, _) = utils.pkl_load(annot_activities_path) video_frames_dict_tr = __sample_frames(video_relative_pathes_tr, n_frames_per_video, model_type) video_frames_dict_te = __sample_frames(video_relative_pathes_te, n_frames_per_video, model_type) utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te), frames_annot_path)
def _106_prepare_action_graph_vector(): """ Each video is labled with a set of actions, we construct a graph using these actions. Links represent the relationship between two nodes. A node however represents one action. For a video, a link is only drawn between two nodes if these two nodes are neighbours. :return: """ gt_actions_path = Pth( 'Breakfast/annotation/gt_unit_actions_timestamped.pkl') action_graph_vectors_path = Pth( 'Breakfast/annotation/action_graph_vectors.pkl') action_graph_matrices_path = Pth( 'Breakfast/annotation/action_graph_matrices.pkl') (video_ids_tr, gt_actions_tr), (video_ids_te, gt_actions_te) = utils.pkl_load(gt_actions_path) graph_matrices_tr = __get_action_graph_matrices(video_ids_tr, gt_actions_tr) graph_matrices_te = __get_action_graph_matrices(video_ids_te, gt_actions_te) graph_vectors_tr = __get_action_graph_vectors(video_ids_tr, gt_actions_tr) graph_vectors_te = __get_action_graph_vectors(video_ids_te, gt_actions_te) print graph_matrices_tr.shape print graph_matrices_te.shape print graph_vectors_tr.shape print graph_vectors_te.shape # save the graph data utils.pkl_dump((graph_matrices_tr, graph_matrices_te), action_graph_matrices_path) utils.pkl_dump((graph_vectors_tr, graph_vectors_te), action_graph_vectors_path)
def _203_sample_frames_resnet(): """ Get list of frames from each video. With max 600 of each video and min 96 frames from each video. These frames will be used to extract features for each video. """ # if required frames per video are 128, there are 51/6 out of 7986/1864 videos in training/testing splits that don't satisfy this n_frames_per_video = 64 model_type = 'resnet' annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl') frames_annot_path = Pth('Breakfast/annotation/annot_frames_resnet_%d.pkl', (n_frames_per_video, )) (video_relative_pathes_tr, _, video_relative_pathes_te, _) = utils.pkl_load(annot_activities_path) video_frames_dict_tr = __sample_frames(video_relative_pathes_tr, n_frames_per_video, model_type) video_frames_dict_te = __sample_frames(video_relative_pathes_te, n_frames_per_video, model_type) utils.pkl_dump((video_frames_dict_tr, video_frames_dict_te), frames_annot_path)
def _704_prepare_many_shots_noun_verb_action_ids(): """ Prepeare two dicts of nouns and verbs to convert from id to many_shot id. All ids are zero-indexed. 71 noun classes 26 verb classes xx actions :return: """ annot_dict_path = Pth( 'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl') annot_idxes_many_shots_path = Pth( 'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl') noun_ids_many_shots_dict_path = Pth( 'EPIC-Kitchens/annotations/noun_ids_many_shots_dict.pkl') verb_ids_many_shots_dict_path = Pth( 'EPIC-Kitchens/annotations/verb_ids_many_shots_dict.pkl') actn_ids_many_shots_dict_path = Pth( 'EPIC-Kitchens/annotations/actn_ids_many_shots_dict.pkl') actn_ids_many_shots_list_path = Pth( 'EPIC-Kitchens/annotations//EPIC_many_shot_actions.csv') annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path) annot_dict = utils.pkl_load(annot_dict_path) # get all verb_ids, noun_ids noun_ids = [ annot_dict[annot_id][10] for annot_id in annot_idxes_many_shots ] verb_ids = [annot_dict[annot_id][8] for annot_id in annot_idxes_many_shots] actn_ids = __get_action_ids_from_annotation(actn_ids_many_shots_list_path) noun_ids = np.sort(np.unique(noun_ids)) verb_ids = np.sort(np.unique(verb_ids)) n_nouns = len(noun_ids) n_verbs = len(verb_ids) n_actns = len(actn_ids) # these dictionaries get the id of many_shot (noun or verb) given the original (noun or verb) many_shot_noun_ids_dict = dict(zip(noun_ids, np.arange(n_nouns))) many_shot_verb_ids_dict = dict(zip(verb_ids, np.arange(n_verbs))) many_shot_actn_ids_dict = dict(zip(actn_ids, np.arange(n_actns))) utils.pkl_dump(many_shot_noun_ids_dict, noun_ids_many_shots_dict_path) utils.pkl_dump(many_shot_verb_ids_dict, verb_ids_many_shots_dict_path) utils.pkl_dump(many_shot_actn_ids_dict, actn_ids_many_shots_dict_path)
def extract_features_i3d_charades(n_frames_in, n_frames_out): """ Extract features from i3d-model n_frames_in = 8 * n_frames_out n_frames_in = 1024,512,256 n_frames_out = 128,64,32 """ # n_frames_in = 1024 # n_frames_out = 128 n_splits_per_video = 2 root_path = '../data' root_Charades_path = '/home/r/renpengzhen/Datasets/Charades' frames_annot_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % ( root_path, n_frames_in) #采样过之后的帧路径 # model_path = '/home/r/renpengzhen/PyTorch/timeception-master/model/i3d_kinetics_model_rgb.pth' #模型存放的位置 model_path = '%s/Charades/baseline_models/i3d/rgb_charades.pt' % ( root_path) # 模型存放的位置 frames_root_path = '%s/Charades_v1_rgb' % (root_Charades_path) #所有视频帧存放的位置 # features_root_path = '%s/Charades/features_i3d_charades_rgb_mixed_5c_untrimmed_%d_frames' % (root_path,n_frames_out) #用来存放使用i3d进行特征提取的路径 features_root_path = '%s/Charades/features_i3d_pytorch_charades_rgb_mixed_5c_%df' % ( root_path, n_frames_out) #用来存放使用i3d进行特征提取的路径 (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load( frames_annot_path ) #导入采样帧词典:包含了训练集和测试集的视频名:帧名列表,('AXIW1', array(['AXIW1-000001.jpg', 'AXIW1-000002.jpg', 'AXIW1-000003.jpg', ..., 'AXIW1-000768.jpg', 'AXIW1-000769.jpg', 'AXIW1-000770.jpg'], dtype='<U16')) video_frames_dict = dict() #构建视频帧空词典 video_frames_dict.update(video_frames_dict_tr) video_frames_dict.update(video_frames_dict_te) video_names = list(video_frames_dict.keys()) #视频的名字 n_videos = len(video_names) #总视频的个数 del video_frames_dict_tr del video_frames_dict_te n_threads = 8 #线程数 n_frames_per_segment = 8 #每个视频段的帧数,这8帧是连续的,在采样的时候就是连续的 assert n_frames_per_segment * n_frames_out == n_frames_in if not os.path.exists(features_root_path): os.makedirs(features_root_path) t1 = time.time() print('extracting training features') print('start time: %s' % utils.timestamp()) # reader for getting video frames 用于获取视频帧的阅读器 video_reader_tr = image_utils.AsyncVideoReaderCharadesForI3DTorchModel( n_threads=n_threads) # aync reader, and get load images for the first video, we will read the first group of videos video_group_frames = __get_video_frame_pathes( video_names[0], frames_root_path, video_frames_dict) #存储第一个视频帧的所有地址,是一个np数组类型 video_reader_tr.load_video_frames_in_batch(video_group_frames) # load the model model = i3d_torch_charades_utils.load_model_i3d_charades_rgb_for_testing( model_path) #进行一次forward,打印模型的具体输入输出细节 print('input_size=(3, 8, 224, 224)') print(torchsummary.summary(model, input_size=(3, 8, 224, 224))) # loop on list of videos,对整个视频数据集进行操作 for idx_video in range(n_videos): video_num = idx_video + 1 video_name = video_names[idx_video] begin_num = 0 end_num = n_videos if begin_num is not None and end_num is not None: if video_num <= begin_num or video_num > end_num: continue # wait until the image_batch is loaded t1 = time.time() while video_reader_tr.is_busy(): time.sleep(0.1) t2 = time.time() duration_waited = t2 - t1 print('... video %04d, %04d, waited: %.02f' % (video_num, n_videos, duration_waited)) # get the frames frames = video_reader_tr.get_images( ) # (G*T*N, 224, 224, 3),这个我觉得是第一个视频里面裁剪过之后的帧图片 # pre-load for the next video group, notice that we take into account the number of instances if video_num < n_videos: next_video_frames = __get_video_frame_pathes( video_names[idx_video + 1], frames_root_path, video_frames_dict) video_reader_tr.load_video_frames_in_batch(next_video_frames) if len(frames) != n_frames_in: raise ('... ... wrong n frames: %s' % (video_name)) # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size frames = np.reshape(frames, (n_frames_out, n_frames_per_segment, 224, 224, 3)) # (T, 8, 224, 224, 3),T实际上就是视频段,即超级帧的个数 # transpose to have the channel_first (G*T, 8, 224, 224, 3) => (T, 3, 8, 224, 224) frames = np.transpose(frames, (0, 4, 1, 2, 3)) # prepare input variable with torch.no_grad(): # extract features input_var = torch.from_numpy( frames).cuda() #(T, 3, 8, 224, 224),T=128,64,32 output_var = model( input_var) #提取特征 torch.Size([128, 1024, 1, 7, 7]) output_var = output_var.cpu() features = output_var.data.numpy() # (T, 1024, 1, 7, 7) # don't forget to clean up variables del input_var del output_var # transpose to have the channel_last features = np.transpose(features, (0, 2, 3, 4, 1)) # (T, 1, 7, 7, 1024) # reshape to have the features for each video in a separate dimension features = np.squeeze(features, axis=1) # (T, 7, 7, 1024),T=128,64,32 # path to save the features,保存特征 video_features_path = '%s/%s.pkl' % (features_root_path, video_name ) #即将保存特征的路径 if os.path.exists(video_features_path): print('... features for video already exist: %s.pkl' % (video_name)) continue # save features utils.pkl_dump(features, video_features_path, is_highest=True) t2 = time.time() print('... finish extracting features in %d seconds' % (t2 - t1))
def _03_prepare_annotation_frame_list(): """ Convert the annotation dict to list. Also, create list for ground truth. """ n_frames_per_sample = 20 n_classes = N_CLASSES root_path = c.data_root_path annotation_dict_tr_path = '%s/Charades/annotation/frames_dict_tr.pkl' % ( root_path) annotation_dict_te_path = '%s/Charades/annotation/frames_dict_te.pkl' % ( root_path) annotation_list_path = '%s/Charades/annotation/frames_list_%d_frames.pkl' % ( root_path, n_frames_per_sample) annotation_dict_tr = utils.pkl_load(annotation_dict_tr_path) annotation_dict_te = utils.pkl_load(annotation_dict_te_path) x_tr = [] x_te = [] y_tr = [] y_te = [] class_nums = range(1, n_classes + 1) for class_num in class_nums: print('... %d/%d' % (class_num, n_classes)) class_annot_tr = annotation_dict_tr[class_num] class_annot_te = annotation_dict_te[class_num] for sample_tr in class_annot_tr: n_f = len(sample_tr) if n_f == 0: print('zero frames in tr sample') continue if n_f < n_frames_per_sample: idx = np.random.randint(low=0, high=n_f, size=(n_frames_per_sample, )) else: idx = np.random.choice(n_f, n_frames_per_sample) sample_frame_pathes = np.array(sample_tr)[idx] x_tr.append(sample_frame_pathes) y_tr.append(class_num) for sample_te in class_annot_te: n_f = len(sample_te) if n_f == 0: print('zero frames in te sample') continue if n_f < n_frames_per_sample: idx = np.random.randint(low=0, high=n_f, size=(n_frames_per_sample, )) else: idx = np.random.choice(n_f, n_frames_per_sample) sample_frame_pathes = np.array(sample_te)[idx] x_te.append(sample_frame_pathes) y_te.append(class_num) x_tr = np.array(x_tr) x_te = np.array(x_te) y_tr = np.array(y_tr) y_te = np.array(y_te) print(x_tr.shape) print(y_tr.shape) print(x_te.shape) print(y_te.shape) data = (x_tr, y_tr, x_te, y_te) utils.pkl_dump(data, annotation_list_path, is_highest=True)
def extract_features_i3d_charades(): """ Extract features from i3d-model """ n_frames_in = 1024 n_frames_out = 128 n_splits_per_video = 2 root_path = '/content/' frames_annot_path = '%s/charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (root_path, n_frames_in) model_path = '%s/charades/baseline_models/i3d/rgb_charades.pt' % (root_path) frames_root_path = '%s/charades/frames/Charades_v1_rgb' % (root_path) features_root_path = '/local-ssd/nhussein/Charades/features_i3d_charades_rgb_mixed_5c_untrimmed_%d_frames' % (n_frames_out) (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load(frames_annot_path) video_frames_dict = dict() video_frames_dict.update(video_frames_dict_tr) video_frames_dict.update(video_frames_dict_te) video_names = video_frames_dict.keys() n_videos = len(video_names) del video_frames_dict_tr del video_frames_dict_te n_threads = 8 n_frames_per_segment = 8 assert n_frames_per_segment * n_frames_out == n_frames_in if not is_local_machine and not os.path.exists(features_root_path): print('Sorry, path does not exist: %s' % (features_root_path)) return t1 = time.time() print('extracting training features') print('start time: %s' % utils.timestamp()) # reader for getting video frames video_reader_tr = image_utils.AsyncVideoReaderCharadesForI3DTorchModel(n_threads=n_threads) # aync reader, and get load images for the first video, we will read the first group of videos video_group_frames = __get_video_frame_pathes(video_names[0], frames_root_path, video_frames_dict) video_reader_tr.load_video_frames_in_batch(video_group_frames) # load the model model = i3d_factory.load_model_i3d_charades_rgb_for_testing(model_path) print(torchsummary.summary(model, input_size=(3, 8, 224, 224))) # import torchsummary # print torchsummary.summary(model, (8, 3, 224, 224)) return # loop on list of videos for idx_video in range(n_videos): video_num = idx_video + 1 video_name = video_names[idx_video] if begin_num is not None and end_num is not None: if video_num <= begin_num or video_num > end_num: continue # wait until the image_batch is loaded t1 = time.time() while video_reader_tr.is_busy(): threading._sleep(0.1) t2 = time.time() duration_waited = t2 - t1 print('... video %04d, %04d, waited: %.02f' % (video_num, n_videos, duration_waited)) # get the frames frames = video_reader_tr.get_images() # (G*T*N, 224, 224, 3) # pre-load for the next video group, notice that we take into account the number of instances if video_num < n_videos: next_video_frames = __get_video_frame_pathes(video_names[idx_video + 1], frames_root_path, video_frames_dict) video_reader_tr.load_video_frames_in_batch(next_video_frames) if len(frames) != n_frames_in: raise ('... ... wrong n frames: %s' % (video_name)) # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size frames = np.reshape(frames, (n_frames_out, n_frames_per_segment, 224, 224, 3)) # (T, 8, 224, 224, 3) # transpose to have the channel_first (G*T, 8, 224, 224, 3) => (T, 3, 8, 224, 224) frames = np.transpose(frames, (0, 4, 1, 2, 3)) # prepare input variable with torch.no_grad(): # extract features input_var = torch.from_numpy(frames).cuda() output_var = model(input_var) output_var = output_var.cpu() features = output_var.data.numpy() # (T, 1024, 1, 7, 7) # don't forget to clean up variables del input_var del output_var # transpose to have the channel_last features = np.transpose(features, (0, 2, 3, 4, 1)) # (T, 1, 7, 7, 1024) # reshape to have the features for each video in a separate dimension features = np.squeeze(features, axis=1) # (T, 7, 7, 1024) # path to save the features video_features_path = '%s/%s.pkl' % (features_root_path, video_name) # if os.path.exists(video_features_path): # print ('... features for video already exist: %s.pkl' % (video_name)) # continue # save features utils.pkl_dump(features, video_features_path, is_highest=True) t2 = time.time() print('... finish extracting features in %d seconds' % (t2 - t1))
def __extract_features_rgb(begin_num=None, end_num=None): root_path = c.DATA_ROOT_PATH # './data' # 这个文件是通过charades.py文件生成的 annotation_path = '%s/Charades/annotation/frames_dict_trimmed_multi_label_i3d_160_frames.pkl' % ( root_path) # charades标注路径 features_root_path = '%s/Charades/features_i3d_charades_rgb_mixed_5c_trimmed_20_frames' % ( root_path) # 特征保存路径 video_frames_root_path = '%s/Charades/frames/Charades_v1_rgb' % ( root_path) # 视频帧的路径 model_path = '%s/Charades/baseline_models/i3d/rgb_charades.pt' % ( root_path) # 预训练模型路径 feature_name = 'Mixed_5c' # 保存第几层的特征 # 1.获取视频标注信息 (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load(annotation_path) video_frames_dict = dict() video_frames_dict.update(video_frames_dict_tr) video_frames_dict.update(video_frames_dict_te) video_names = video_frames_dict.keys() n_videos = len(video_names) frame_count = 0 if not os.path.exists(features_root_path): print('Sorry, path does not exist: %s' % (features_root_path)) return t1 = time.time() print('extracting training features') print('start time: %s' % utils.timestamp()) # aync reader, and get load images for the first video #========================================下面这个加载器没有写=================================# img_reader = image_utils.AsyncImageReaderCharadesForI3DTorchModel( n_threads=20) # 加载图片 img_reader.load_imgs_in_batch( __get_video_frame_pathes(video_names[0], video_frames_root_path, video_frames_dict)) # load the model model = __load_i3d_model_rgb(model_path) torchsummary.summary(model, input_size=(3, 160, 224, 224)) # loop on list of videos for idx_video in range(n_videos): video_num = idx_video + 1 if begin_num is not None and end_num is not None: if video_num <= begin_num or video_num > end_num: continue video_name = video_names[idx_video] # wait untill the image_batch is loaded t1 = time.time() while img_reader.is_busy( ): # 如果上面的img_reader.load_imgs_in_batch中的is_busy为True,则表明图片还没加载完 threading._sleep(0.1) t2 = time.time() duration_waited = t2 - t1 print('...... video %d/%d: %s, waited: %d' % (video_num, n_videos, video_name, duration_waited)) # get the video frames video_frames = img_reader.get_images() # pre-load for the next video if video_num < n_videos: next_video_name = video_names[idx_video + 1] img_reader.load_imgs_in_batch( __get_video_frame_pathes(next_video_name, video_frames_root_path, video_frames_dict)) video_features_path = '%s/%s.pkl' % (features_root_path, video_name) # if os.path.exists(video_features_path): # print ('... features for video already exist: %s.pkl' % (video_name)) # continue # chrades的视频帧数是固定的160帧 if len(video_frames) != 160: print('... wrong n frames: %d' % (video_num)) continue # transpose to have the channel_first (160, 224, 224, 3) => (3, 160, 224, 224) video_frames = np.transpose(video_frames, (3, 0, 1, 2)) # add one dimension to represent the batch size video_frames = np.expand_dims(video_frames, axis=0) # (N,C,L,H,W) # prepare input variable with torch.no_grad(): # extract features input_var = torch.from_numpy(video_frames).cuda() # 将视频转为gpu output_var = model(input_var) output_var = output_var.cpu() features = output_var.data.numpy() # (1, 1024, 20, 7, 7) # don't forget to clean up variables # 每一个视频的特征抽取完后就必须清空这两个变量,否则会报错 del input_var del output_var # squeeze to remove the dimension of the batch_size features = features[0] # (1024, 20, 7, 7) # transpose to have the channel_last features = np.transpose( features, (1, 2, 3, 0)) # (20, 7, 7, 1024)=====(T,H,W,C),如果后面用的还是Pytorch,那么就不需要这一步 # path to save the features utils.pkl_dump(features, video_features_path, is_highest=True) # 保存特征 # increment counts frame_count += len(video_frames) t2 = time.time() print('finish extracting %d features in %d seconds' % (frame_count, t2 - t1)) print('end time: %s' % utils.timestamp())
def _06_get_graph_edges(): # load data n_timesteps = 64 n_centroids = 128 is_max_layer = True model_name = 'classifier_19.02.21-01:00:30' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,)) if is_max_layer: edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_%s.h5', (model_name,)) edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_reduced_%s.pkl', (model_name,)) layer_name = 'pool_t_1' n_timesteps = 21 n_nodes = 10 else: edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_%s.h5', (model_name,)) edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_reduced_%s.pkl', (model_name,)) layer_name = 'leaky_re_lu_3' n_timesteps = 64 n_nodes = 32 v_input_n = utils.pkl_load(centroids_path) (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) epoch_num = 133 batch_size = 40 model = __load_model(model_name, epoch_num) t_input_n = model.get_layer('input_n').input t_input_x = model.get_layer('input_x').input t_activations = model.get_layer(layer_name).output # (None * 64, 32, 1, 1, 1024) keras_session = K.get_session() # 1357 train, 335 test vals_tr = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_tr) # (None*64, 32, 1, 1, 1024) vals_te = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_te) # (None*64, 32, 1, 1, 1024) vals_tr = np.squeeze(vals_tr, axis=2) vals_tr = np.squeeze(vals_tr, axis=2) vals_te = np.squeeze(vals_te, axis=2) vals_te = np.squeeze(vals_te, axis=2) n_tr = 1357 n_te = 355 if is_max_layer: vals_tr = np.reshape(vals_tr, (n_tr, n_nodes, n_timesteps, 1024)) # (None, timesteps, nodes, feat_size), (1357, 10, 21, 1024) vals_te = np.reshape(vals_te, (n_te, n_nodes, n_timesteps, 1024)) # (None, timesteps, nodes, feat_size), (355, 10, 21, 1024) else: vals_tr = np.reshape(vals_tr, (n_tr, n_timesteps, n_nodes, 1024)) # (None, timesteps, nodes, feat_size), (1357, 64, 32, 1024) vals_te = np.reshape(vals_te, (n_te, n_timesteps, n_nodes, 1024)) # (None, timesteps, nodes, feat_size), (355, 64, 32, 1024) print ('finally') print x_tr.shape print x_te.shape print vals_tr.shape print vals_te.shape utils.h5_dump_multi((vals_tr, vals_te), ['x_tr', 'x_te'], edge_values_path) vals_tr = np.mean(vals_tr, axis=3) vals_te = np.mean(vals_te, axis=3) utils.pkl_dump((vals_tr, vals_te), edge_pooled_values_path)