def __init__(self, batch_size, n_classes, feature_dim, feature_name, is_training, is_shuffle=True): """ Initialization """ self.batch_size = batch_size self.is_training = is_training self.n_classes = n_classes self.feature_dim = feature_dim self.feature_name = feature_name # self.is_shuffle = is_shuffle self.dataset_name = 'charades' # load annotation root_path = './data/charades' annotation_path = '%s/annotation/video_annotation_py3.pkl' % ( root_path) if self.is_training: (video_names, y, _, _) = utils.pkl_load(annotation_path) print(video_names) else: (_, _, video_names, y) = utils.pkl_load(annotation_path) # in case of single label classification, debinarize the labels if config.cfg.MODEL.CLASSIFICATION_TYPE == 'sl': y = utils.debinarize_label(y) # in any case, make sure target is float y = y.astype(np.float32) # convert relative to root pathes feats_path = np.array([ '%s/%s/%s.pkl' % (root_path, feature_name, p) for p in video_names ]) n_samples = len(y) self.n_samples = n_samples self.n_batches = utils.calc_num_batches(n_samples, batch_size) self.feats_path = feats_path self.y = y
def __get_tensor_values(batch_size, keras_session, tensor, t_input_n, t_input_x, v_input_n, x): n_items = len(x) n_batches = utils.calc_num_batches(n_items, batch_size) data = None for idx_batch in range(n_batches): utils.print_counter(idx_batch + 1, n_batches) idx_b = idx_batch * batch_size idx_e = (idx_batch + 1) * batch_size v_input_x = x[idx_b:idx_e] print v_input_x.shape values, = keras_session.run([tensor], {t_input_x: v_input_x, t_input_n: v_input_n}) # (None, 1, 1, 64, 128) print values.shape print data = values if data is None else np.vstack((data, values)) data = np.array(data) return data
def __init__(self, batch_size, n_classes, feature_dim, feature_name, is_training, is_shuffle=True): """ Initialization """ self.batch_size = batch_size #32 self.is_training = is_training #True self.n_classes = n_classes #157 self.feature_dim = feature_dim #(1024, 32, 7, 7) self.feature_name = feature_name #'features_i3d_pytorch_charades_rgb_mixed_5c_32f' self.is_shuffle = is_shuffle self.dataset_name = 'Charades' # load annotation root_path = './data/Charades' annotation_path = '%s/annotation/video_annotation.pkl' % (root_path) #视频注释./data/Charades/annotation/video_annotation.pkl if self.is_training: (video_names, y, _, _) = utils.pkl_load(annotation_path) #video_names [b'001YG' b'004QE' b'00HFP' ... b'ZZDBH' b'ZZN85' b'ZZXQF'],y.shape=(7811, 157) else: (_, _, video_names, y) = utils.pkl_load(annotation_path) #y.shape = (1814,157) # in case of single label classification, debinarize the labels,单标签 if config.cfg.MODEL.CLASSIFICATION_TYPE == 'sl': y = utils.debinarize_label(y) # in any case, make sure target is float y = y.astype(np.float32) # convert relative to root pathes,通过I3D进行特征提取的特征存放的路径 feats_path = np.array(['%s/%s/%s.pkl' % (root_path, feature_name, p.astype(str)) for p in video_names]) #原版的。#'./data/Charades/features_i3d_pytorch_charades_rgb_mixed_5c_32f/' n_samples = len(y) self.n_samples = n_samples self.n_batches = utils.calc_num_batches(n_samples, batch_size) #计算batch的个数 self.feats_path = feats_path #特征存放的路径 self.y = y # shuffle the data,打乱顺序 if self.is_shuffle: self.__shuffle()
def train_model_on_pickled_features(): """ Train model. """ annotation_type = 'noun' annot_path = Pth( 'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl') (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type) model_type = 'i3d_rgb' feature_type = 'mixed_5c' n_nodes = 128 n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment batch_size_tr = 20 batch_size_te = 30 n_epochs = 500 epoch_offset = 0 model_name = 'classifier_%s' % (utils.timestamp()) model_root_path = Pth('EPIC-Kitchens/models') features_path = Pth( 'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) nodes_path = Pth('EPIC-Kitchens/features_centroids/features_random_%d.pkl', (n_nodes, )) n_channels, side_dim = utils.get_model_feat_maps_info( model_type, feature_type) input_shape = (None, n_timesteps, side_dim, side_dim, n_channels) nodes = utils.pkl_load(nodes_path) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() model = __load_model_videograph(nodes, n_classes, input_shape) t2 = time.time() duration = t2 - t1 print(model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_path)) t1 = time.time() # features are extracting using datasets.Epic_Kitchens.i3d_keras_epic_kitchens() # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c' # to get a better performance, you need to write code to randomly sample new frames and extract their features every new epoch # please use this function to random sampling, instead of uniform sampling: Epic_Kitchens.__random_sample_frames_per_video_for_i3d() # then extract their features, as done in: Epic_Kitchens._901_extract_features_i3d() # then train on the extracted features. Please do so in every epoch. It's computationally heavy, but you cannot avoid random sampling to get better results. # Even better results if you replace I3D with a 2D/3D CNN that's previously fine-tuned on Epic-Kitchens (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % (duration)) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = utils.calc_num_batches(n_te, batch_size_te) print('... [tr]: n, n_batch, batch_size: %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr)) print('... [te]: n, n_batch, batch_size: %d, %d, %d' % (n_te, n_batch_te, batch_size_te)) print(x_tr.shape) print(x_te.shape) print(y_tr.shape) print(y_te.shape) save_callback = keras_utils.ModelSaveCallback(model, model_name, epoch_offset, model_root_path) score_callback = keras_utils.MapScoreCallback(model, None, None, x_te, y_te, batch_size_te, n_classes) model_callbacks = [save_callback, score_callback] model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=model_callbacks, verbose=2) print('--- finish time') print(datetime.datetime.now())
def train_model_videograph(): """ Train model. """ annotation_type = 'noun' annot_path = Pth( 'EPIC-Kitchens/annotations/annot_video_level_many_shots.pkl') (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type) model_type = 'i3d_rgb' feature_type = 'mixed_5c' n_nodes = 128 n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment batch_size_tr = 20 batch_size_te = 30 n_epochs = 500 epoch_offset = 0 model_name = 'classifier_%s' % (utils.timestamp()) model_root_path = Pth('EPIC-Kitchens/models') nodes_path = Pth('EPIC-Kitchens/features/nodes_random_%d.pkl', (n_nodes, )) features_path = Pth( 'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) n_channels, side_dim = utils.get_model_feat_maps_info( model_type, feature_type) input_shape = (None, n_timesteps, side_dim, side_dim, n_channels) # either load nodes, or generate them on the fly, but remeber to save them, as you need them in test time # nodes = utils.pkl_load(nodes_path) nodes = utils.generate_centroids(n_nodes, n_channels) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() model = __load_model_videograph(nodes, n_classes, input_shape) t2 = time.time() duration = t2 - t1 print(model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_path)) t1 = time.time() # features are extracting using datasets.epic_kitchens.i3d_keras_epic_kitchens() # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c' (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % (duration)) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = utils.calc_num_batches(n_te, batch_size_te) print('... [tr]: n, n_batch, batch_size: %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr)) print('... [te]: n, n_batch, batch_size: %d, %d, %d' % (n_te, n_batch_te, batch_size_te)) print(x_tr.shape) print(x_te.shape) print(y_tr.shape) print(y_te.shape) save_callback = keras_utils.ModelSaveCallback(model, model_name, epoch_offset, model_root_path) score_callback = keras_utils.MapScoreCallback(model, None, None, x_te, y_te, batch_size_te, n_classes) model_callbacks = [save_callback, score_callback] model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=model_callbacks, verbose=2) print('--- finish time') print(datetime.datetime.now())
def train_model_on_pickled_features(): """ Train model. """ model_type = 'i3d_rgb' feature_type = 'mixed_5c' n_centroids = 128 n_timesteps = 64 is_spatial_pooling = True is_resume_training = False batch_size_tr = 12 batch_size_te = 30 n_epochs = 100 n_classes = N_CLASSES n_gpus = 1 model_name = 'classifier_%s' % (utils.timestamp()) model_weight_path = '' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames_max_pool.h5', (n_timesteps * 8,)) if is_spatial_pooling else Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,)) gt_actions_path = Pth('Breakfast/annotation/gt_unit_actions.pkl') (_, y_tr), (_, y_te) = utils.pkl_load(gt_actions_path) centroids = utils.pkl_load(centroids_path) n_feat_maps, feat_map_side_dim = utils.get_model_feat_maps_info(model_type, feature_type) feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim, n_feat_maps) print ('--- start time') print (datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() root_model, model = __load_model_mlp_classifier_video_graph(centroids, n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path) t2 = time.time() duration = t2 - t1 print (root_model.summary(line_length=130, positions=None, print_fn=None)) print ('... model built, duration (sec): %d' % (duration)) # load data print ('... loading data: %s' % (features_path)) print ('... centroids: %s' % (centroids_path)) t1 = time.time() (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print ('... data loaded: %d' % (duration)) print(x_tr.shape) print(y_tr.shape) print(x_te.shape) print(y_te.shape) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = utils.calc_num_batches(n_te, batch_size_te) print ('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr, n_gpus)) print ('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_te, n_batch_te, batch_size_te, n_gpus)) score_callback = ScoreCallback(model, None, None, x_te, y_te, batch_size_te) callbacks = [score_callback] model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=callbacks, verbose=2) print ('--- finish time') print (datetime.datetime.now())