def __init__(self, n_timesteps, n_timesteps_total, featurenet_type, x_heavy_path, is_random_tr=True, is_random_te=False, is_shuffle_tr=True, is_shuffle_te=False): """ :param n_timesteps: How many timesteps per video. :param is_random_tr: Sample random or uniform frames. :param is_random_te: Sample random or uniform frames. :param is_shuffle_tr: To shuffle data or not. :param is_shuffle_te: To shuffle data or not. """ self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te self.__is_shuffle_tr = is_shuffle_tr self.__is_shuffle_te = is_shuffle_te self.__n_timesteps = n_timesteps self.__n_timesteps_total = n_timesteps_total n_frames_per_segment = utils.get_model_n_frames_per_segment(featurenet_type) n_frames = n_timesteps_total * n_frames_per_segment gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') frames_annot_path = Pth('Breakfast/annotation/annot_frames_%s_%d.pkl', (featurenet_type, n_frames,)) (self.__video_ids_tr, self.__y_tr, self.__video_ids_te, self.__y_te) = utils.pkl_load(gt_activities_path) (x_heavy_tr, x_heavy_te) = utils.h5_load_multi(x_heavy_path, ['x_tr', 'x_te']) # (B, C, T, H, W) self.__x_heavy_tr = x_heavy_tr self.__x_heavy_te = x_heavy_te # select middle frame from each snippet (frames_dict_tr, frames_dict_te) = utils.pkl_load(frames_annot_path) frames_dict_tr = self.__select_middle_frame(frames_dict_tr, n_frames_per_segment) frames_dict_te = self.__select_middle_frame(frames_dict_te, n_frames_per_segment) self.__frames_dict_tr = frames_dict_tr self.__frames_dict_te = frames_dict_te
def __init__(self, features_path, n_timesteps, n_timesteps_total, is_random_tr=True, is_random_te=False, dataset_type=None): if dataset_type == const.DATASET_TYPES.breakfast: gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path) elif dataset_type == const.DATASET_TYPES.charades: gt_activities_path = Pth('Charades/annotation/video_annotation.pkl') (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path) self.__y_tr = self.__y_tr.astype(np.float32) self.__y_te = self.__y_te.astype(np.float32) else: raise Exception('Unknown Dataset Type: %s' % (dataset_type)) (self.__x_tr, self.__x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) self.__feature_root_path = features_path self.__n_timesteps_total = n_timesteps_total self.__n_timesteps = n_timesteps self.__is_random_tr = is_random_tr self.__is_random_te = is_random_te
def __init__(self, features_path, n_timesteps, n_timesteps_total, dataset_type=None): if dataset_type == const.DATASET_TYPES.breakfast: gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path) elif dataset_type == const.DATASET_TYPES.charades: gt_activities_path = Pth('Charades/annotation/video_annotation.pkl') (_, self.__y_tr, _, self.__y_te) = utils.pkl_load(gt_activities_path) self.__y_tr = self.__y_tr.astype(np.float32) self.__y_te = self.__y_te.astype(np.float32) else: raise Exception('Unknown Dataset Type: %s' % (dataset_type)) (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) step = n_timesteps_total / float(n_timesteps) idxes = np.arange(0, n_timesteps_total, step, dtype=np.float32).astype(np.int32) x_tr = x_tr[:, :, idxes] x_te = x_te[:, :, idxes] self.__x_tr = x_tr.astype(np.float32) self.__x_te = x_te.astype(np.float32)
def _04_get_activation_values(): # load data n_timesteps = 64 n_centroids = 128 model_name = 'classifier_19.02.21-01:00:30' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,)) attention_values_path = Pth('Breakfast/qualitative_results/node_attention_%s.pkl', (model_name,)) v_input_n = utils.pkl_load(centroids_path) (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) epoch_num = 133 model = __load_model(model_name, epoch_num) t_input_n = model.get_layer('input_n').input t_input_x = model.get_layer('input_x').input t_node_attention = model.get_layer('node_attention').output # # (None, 7, 7, 64, 100) keras_session = K.get_session() batch_size = 40 att_tr = __get_tensor_values(batch_size, keras_session, t_node_attention, t_input_n, t_input_x, v_input_n, x_tr) # (None, 1, 1, 64, 128) att_te = __get_tensor_values(batch_size, keras_session, t_node_attention, t_input_n, t_input_x, v_input_n, x_te) # (None, 1, 1, 64, 128) att_tr = np.squeeze(att_tr, axis=1) # (None, 1, 64, 128) att_tr = np.squeeze(att_tr, axis=1) # (None, 64, 128) att_te = np.squeeze(att_te, axis=1) # (None, 1, 64, 128) att_te = np.squeeze(att_te, axis=1) # (None, 64, 128) print ('finally') print x_tr.shape print x_te.shape print att_tr.shape print att_te.shape utils.pkl_dump((att_tr, att_te), attention_values_path)
def train_model_on_pickled_features(): """ Train model. """ model_type = 'i3d_rgb' feature_type = 'mixed_5c' is_spatial_pooling = False is_resume_training = False n_timesteps = 64 batch_size_tr = 16 batch_size_te = 40 n_centroids = 128 n_epochs = 100 n_classes = N_CLASSES n_gpus = 1 model_name = 'classifier_%s' % (utils.timestamp()) model_weight_path = '' model_root_path = Pth('Breakfast/models/') gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') centroids_path = Pth( 'Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids, )) features_path = Pth( 'Breakfast/features/features_i3d_mixed_5c_%d_frames_max_pool.h5', (n_timesteps * 8, )) if is_spatial_pooling else Pth( 'Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8, )) centroids = utils.pkl_load(centroids_path) (video_ids_tr, y_tr), (video_ids_te, y_te) = utils.pkl_load(gt_activities_path) n_feat_maps, feat_map_side_dim = __get_model_feat_maps_info( model_type, feature_type) feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim, n_feat_maps) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() # root_model, model = __load_model_mlp_classifier_action_vlad(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path) # root_model, model = __load_model_mlp_classifier_timeception(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path) root_model, model = __load_model_mlp_classifier_video_graph( centroids, n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path) t2 = time.time() duration = t2 - t1 print(root_model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_path)) print('... centroids: %s' % (centroids_path)) t1 = time.time() (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % (duration)) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = __calc_num_batches(n_tr, batch_size_tr) n_batch_te = __calc_num_batches(n_te, batch_size_te) print('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr, n_gpus)) print('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_te, n_batch_te, batch_size_te, n_gpus)) save_callback = keras_utils.ModelSaveCallback(model, model_name, model_root_path) model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=[save_callback], verbose=2) print('--- finish time') print(datetime.datetime.now())
def train_model_on_pickled_features(): """ Train model. """ annotation_type = 'noun' annot_path = Pth( 'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl') (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type) model_type = 'i3d_rgb' feature_type = 'mixed_5c' n_nodes = 128 n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment batch_size_tr = 20 batch_size_te = 30 n_epochs = 500 epoch_offset = 0 model_name = 'classifier_%s' % (utils.timestamp()) model_root_path = Pth('EPIC-Kitchens/models') features_path = Pth( 'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) nodes_path = Pth('EPIC-Kitchens/features_centroids/features_random_%d.pkl', (n_nodes, )) n_channels, side_dim = utils.get_model_feat_maps_info( model_type, feature_type) input_shape = (None, n_timesteps, side_dim, side_dim, n_channels) nodes = utils.pkl_load(nodes_path) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() model = __load_model_videograph(nodes, n_classes, input_shape) t2 = time.time() duration = t2 - t1 print(model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_path)) t1 = time.time() # features are extracting using datasets.Epic_Kitchens.i3d_keras_epic_kitchens() # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c' # to get a better performance, you need to write code to randomly sample new frames and extract their features every new epoch # please use this function to random sampling, instead of uniform sampling: Epic_Kitchens.__random_sample_frames_per_video_for_i3d() # then extract their features, as done in: Epic_Kitchens._901_extract_features_i3d() # then train on the extracted features. Please do so in every epoch. It's computationally heavy, but you cannot avoid random sampling to get better results. # Even better results if you replace I3D with a 2D/3D CNN that's previously fine-tuned on Epic-Kitchens (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % (duration)) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = utils.calc_num_batches(n_te, batch_size_te) print('... [tr]: n, n_batch, batch_size: %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr)) print('... [te]: n, n_batch, batch_size: %d, %d, %d' % (n_te, n_batch_te, batch_size_te)) print(x_tr.shape) print(x_te.shape) print(y_tr.shape) print(y_te.shape) save_callback = keras_utils.ModelSaveCallback(model, model_name, epoch_offset, model_root_path) score_callback = keras_utils.MapScoreCallback(model, None, None, x_te, y_te, batch_size_te, n_classes) model_callbacks = [save_callback, score_callback] model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=model_callbacks, verbose=2) print('--- finish time') print(datetime.datetime.now())
def train_model_videograph(): """ Train model. """ annotation_type = 'noun' annot_path = Pth( 'EPIC-Kitchens/annotations/annot_video_level_many_shots.pkl') (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type) model_type = 'i3d_rgb' feature_type = 'mixed_5c' n_nodes = 128 n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment batch_size_tr = 20 batch_size_te = 30 n_epochs = 500 epoch_offset = 0 model_name = 'classifier_%s' % (utils.timestamp()) model_root_path = Pth('EPIC-Kitchens/models') nodes_path = Pth('EPIC-Kitchens/features/nodes_random_%d.pkl', (n_nodes, )) features_path = Pth( 'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) n_channels, side_dim = utils.get_model_feat_maps_info( model_type, feature_type) input_shape = (None, n_timesteps, side_dim, side_dim, n_channels) # either load nodes, or generate them on the fly, but remeber to save them, as you need them in test time # nodes = utils.pkl_load(nodes_path) nodes = utils.generate_centroids(n_nodes, n_channels) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() model = __load_model_videograph(nodes, n_classes, input_shape) t2 = time.time() duration = t2 - t1 print(model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_path)) t1 = time.time() # features are extracting using datasets.epic_kitchens.i3d_keras_epic_kitchens() # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c' (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % (duration)) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = utils.calc_num_batches(n_te, batch_size_te) print('... [tr]: n, n_batch, batch_size: %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr)) print('... [te]: n, n_batch, batch_size: %d, %d, %d' % (n_te, n_batch_te, batch_size_te)) print(x_tr.shape) print(x_te.shape) print(y_tr.shape) print(y_te.shape) save_callback = keras_utils.ModelSaveCallback(model, model_name, epoch_offset, model_root_path) score_callback = keras_utils.MapScoreCallback(model, None, None, x_te, y_te, batch_size_te, n_classes) model_callbacks = [save_callback, score_callback] model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=model_callbacks, verbose=2) print('--- finish time') print(datetime.datetime.now())
def _07_visualize_graph_edges(): # load data n_timesteps = 64 is_max_layer = True model_name = 'classifier_19.02.21-01:00:30' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') frames_annot_path = Pth('Breakfast/annotation/annot_frames_i3d_%d.pkl', (512,)) class_names_path = Pth('Breakfast/annotation/activities_list.pkl') if is_max_layer: edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_%s.h5', (model_name,)) n_timesteps = 21 n_nodes = 10 else: edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_%s.h5', (model_name,)) n_timesteps = 64 n_nodes = 32 n_classes = ds_breakfast.N_CLASSES_ACTIVITIES frames_annot = utils.pkl_load(frames_annot_path) class_names = utils.pkl_load(class_names_path) (video_ids_tr, y_tr), (video_ids_te, y_te) = utils.pkl_load(gt_activities_path) y_tr = utils.debinarize_label(y_tr) y_te = utils.debinarize_label(y_te) n_classes = ds_breakfast.N_CLASSES_ACTIVITIES if is_max_layer: # (1357, 10, 21) # (355, 10, 21) (x_tr, x_te,) = utils.h5_load_multi(edge_values_path, ['x_tr', 'x_te']) x_tr = np.transpose(x_tr, (0, 2, 1, 3)) # (1357, 21, 10, 1024) x_te = np.transpose(x_te, (0, 2, 1, 3)) # (355, 21, 10) else: # (1357, 64, 32, 1024) # (355, 64, 32, 1024) (x_tr, x_te) = utils.pkl_load(edge_values_path) x_original = x_tr y = y_tr assert n_timesteps == x_original.shape[1] assert n_nodes == x_original.shape[2] # pool over time x = np.mean(x_original, axis=1) # (None, N, C) padding = 3 node_ids = np.arange(n_nodes) x_sum_mean = np.mean(np.sum(x, axis=2), axis=0) min_node_value = min(x_sum_mean) max_node_value = max(x_sum_mean) def _scale_val(val): val = 1 / val val = pow(val, 1.2) return val # loop on classes of the dataset for idx_class in range(n_classes): class_num = idx_class + 1 class_name = class_names[idx_class] idx_samples = np.where(y == idx_class)[0] x_class = x[idx_samples] # (None, N, C) # pool over samples x_class = np.mean(x_class, axis=0) # (N, C) graph = nx.Graph() node_values = np.sum(x_class, axis=1) # add the items as nodes to the graph for id in node_ids: if not graph.has_node(id): graph.add_node(id) max_edge_val = 0.0 min_edge_val = 10000 for idx_node in range(n_nodes): for idx_col in range(idx_node - padding, idx_node + padding + 1): for idx_row in range(idx_node - padding, idx_node + padding + 1): if idx_col < 0 or idx_col >= n_nodes: continue if idx_row < 0 or idx_row >= n_nodes: continue if idx_row == idx_col: continue val = distance.euclidean(x_class[idx_row], x_class[idx_col]) val = _scale_val(val) min_edge_val = min(min_edge_val, val) max_edge_val = max(max_edge_val, val) for idx_node in range(n_nodes): for idx_col in range(idx_node - padding, idx_node + padding + 1): for idx_row in range(idx_node - padding, idx_node + padding + 1): if idx_col < 0 or idx_col >= n_nodes: continue if idx_row < 0 or idx_row >= n_nodes: continue if idx_row == idx_col: continue # this value represents edges between nodes in local window of size 7 val = distance.euclidean(x_class[idx_row], x_class[idx_col]) val = _scale_val(val) id_1 = idx_col id_2 = idx_row # add edge if not exist, else, get old duration and average it with current one if not graph.has_edge(id_1, id_2): graph.add_edge(id_1, id_2, vals=[val], val=val) else: vals = [val] + graph.get_edge_data(id_1, id_2)['vals'] val = np.average(vals) graph[id_1][id_2]['vals'] = vals graph[id_1][id_2]['val'] = val # now plot this graph g_edges = graph.edges g_nodes = graph.nodes # embed the graph # g_embedding = __async_tsne_embedding(x_class) # g_embedding = nx.random_layout(graph) # g_embedding = nx.spectral_layout(graph, weight='val') # spectral embedding with matrix laplacian # g_embedding = nx.kamada_kawai_layout(graph, weight='val', scale=10, dim=2) # optimal distance between nodes g_embedding = nx.spring_layout(graph, weight='val', iterations=1000, scale=10, dim=2, seed=101) # plot graph __plot_embedded_graph(graph, g_embedding, g_edges, node_values, class_num, class_name, min_node_value, max_node_value, min_edge_val, max_edge_val, n_nodes)
def _06_get_graph_edges(): # load data n_timesteps = 64 n_centroids = 128 is_max_layer = True model_name = 'classifier_19.02.21-01:00:30' features_path = Pth('Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8,)) centroids_path = Pth('Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids,)) if is_max_layer: edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_%s.h5', (model_name,)) edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_max_reduced_%s.pkl', (model_name,)) layer_name = 'pool_t_1' n_timesteps = 21 n_nodes = 10 else: edge_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_%s.h5', (model_name,)) edge_pooled_values_path = Pth('Breakfast/qualitative_results/graph_edges_relu_reduced_%s.pkl', (model_name,)) layer_name = 'leaky_re_lu_3' n_timesteps = 64 n_nodes = 32 v_input_n = utils.pkl_load(centroids_path) (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) epoch_num = 133 batch_size = 40 model = __load_model(model_name, epoch_num) t_input_n = model.get_layer('input_n').input t_input_x = model.get_layer('input_x').input t_activations = model.get_layer(layer_name).output # (None * 64, 32, 1, 1, 1024) keras_session = K.get_session() # 1357 train, 335 test vals_tr = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_tr) # (None*64, 32, 1, 1, 1024) vals_te = __get_tensor_values(batch_size, keras_session, t_activations, t_input_n, t_input_x, v_input_n, x_te) # (None*64, 32, 1, 1, 1024) vals_tr = np.squeeze(vals_tr, axis=2) vals_tr = np.squeeze(vals_tr, axis=2) vals_te = np.squeeze(vals_te, axis=2) vals_te = np.squeeze(vals_te, axis=2) n_tr = 1357 n_te = 355 if is_max_layer: vals_tr = np.reshape(vals_tr, (n_tr, n_nodes, n_timesteps, 1024)) # (None, timesteps, nodes, feat_size), (1357, 10, 21, 1024) vals_te = np.reshape(vals_te, (n_te, n_nodes, n_timesteps, 1024)) # (None, timesteps, nodes, feat_size), (355, 10, 21, 1024) else: vals_tr = np.reshape(vals_tr, (n_tr, n_timesteps, n_nodes, 1024)) # (None, timesteps, nodes, feat_size), (1357, 64, 32, 1024) vals_te = np.reshape(vals_te, (n_te, n_timesteps, n_nodes, 1024)) # (None, timesteps, nodes, feat_size), (355, 64, 32, 1024) print ('finally') print x_tr.shape print x_te.shape print vals_tr.shape print vals_te.shape utils.h5_dump_multi((vals_tr, vals_te), ['x_tr', 'x_te'], edge_values_path) vals_tr = np.mean(vals_tr, axis=3) vals_te = np.mean(vals_te, axis=3) utils.pkl_dump((vals_tr, vals_te), edge_pooled_values_path)
def analysis(): ###### Annotation labels ########################################################################################################################### path_anno = 'Hico/features/h5/anno_hico.pkl' num_class = 600 metric_fn = pytorch_utils.METRIC_FUNCTIONS.ap_hico annot_path = Pth(path_anno) print('... loading data') (img_names_tr, y_tr, _, img_names_te, y_te, y_te_mask) = utils.pkl_load(annot_path) y_tr = y_tr.astype(np.float32) y_te = y_te.astype(np.float32) metric_fn = pytorch_utils.METRIC_FUNCTIONS.ap_hico_all ###### Annotation labels ########################################################################################################################### ##### Load interaction categories ######################################################################################################### print('...Loading categories...') classes = sio.loadmat('../../where-is-interaction/main/data/anno.mat') nouns = classes['objects'] verbs = classes['verbs'] cats = classes['super_category'] verblist = [] for v in verbs: verblist.append(np.squeeze(v[0][0])) objlist = [] for o in nouns: objlist.append(np.squeeze(o[0][0])) verblist = np.array(verblist) objlist = np.array(objlist) verblist = np.squeeze(verblist) objlist = np.squeeze(objlist) ##### Load interaction categories ######################################################################################################### ##### Load alpha values for analysis ######################################################################################################### print('...Loading alpha values...') alpha_path = '/var/scratch/mkilicka/code/context-driven-interactions/submission/data/hico/results/gumbel_softmax_hard_gating.h5' (y_te_pred, alphas) = utils.h5_load_multi( alpha_path, ['y_pred_te', 'alphas']) # (B, 600), (B, M, N) C = y_te_pred.shape[1] alphas = alphas.max(2) # (B, M) ##### Generating alpha values per class statistics ######################################################################################################### print('...Computing class-level alphas...') # per interaction output = np.zeros((C, 4), dtype=np.float32) y_te_ = np.transpose(y_te) for i in range(C): index = np.where(y_te_[i] == 1)[0] alphas_per_class = alphas[index].mean(0) output[i] = alphas_per_class # per object unique_objects = np.unique(objlist) output_object = np.zeros((len(unique_objects), 4), dtype=np.float32) for i in range(len(unique_objects)): inter_classes = np.where(objlist == unique_objects[i])[0] for j in inter_classes: index = np.where(y_te_[j] == 1)[0] alphas_per_class = alphas[index].mean(0) output_object[i] += alphas_per_class output_object[i] = output_object[i] / len(inter_classes) # per verb unique_verbs = np.unique(verblist) output_verb = np.zeros((len(unique_verbs), 4), dtype=np.float32) for i in range(len(unique_verbs)): inter_classes = np.where(verblist == unique_verbs[i])[0] for j in inter_classes: index = np.where(y_te_[j] == 1)[0] alphas_per_class = alphas[index].mean(0) output_verb[i] += alphas_per_class output_verb[i] = output_verb[i] / len(inter_classes) ##### Generating alpha values per class statistics ######################################################################################################### ##### Export alpha values to csv file for interaction ######################################################################################################### print('...Exporting alphas...') import csv classfile = open('./analysis/per_class_alpha_analysis.csv', 'w') classfile.write('verb\tobject\tlvis\tlocal_scene\tdeformation\tpart\n') for i in range(C): text = verblist[i] + '\t' + objlist[i] + '\t' + str( output[i, 0]) + '\t' + str(output[i, 1]) + '\t' + str( output[i, 2]) + '\t' + str(output[i, 3]) + '\n' classfile.write(text) classfile.close() ##### Export alpha values to csv file for interactions ######################################################################################################### ##### Export alpha values to csv file for objects ######################################################################################################### print('...Exporting alphas...') import csv classfile = open('./analysis/per_class_alpha_analysis_object.csv', 'w') classfile.write('object\tlvis\tlocal_scene\tdeformation\tpart\n') for i in range(len(unique_objects)): text = unique_objects[i] + '\t' + str(output_object[ i, 0]) + '\t' + str(output_object[i, 1]) + '\t' + str( output_object[i, 2]) + '\t' + str(output_object[i, 3]) + '\n' classfile.write(text) classfile.close() ##### Export alpha values to csv file for objects ######################################################################################################### ##### Export alpha values to csv file for objects ######################################################################################################### print('...Exporting alphas...') import csv classfile = open('./analysis/per_class_alpha_analysis_verb.csv', 'w') classfile.write('verb\tlvis\tlocal_scene\tdeformation\tpart\n') for i in range(len(unique_verbs)): text = unique_verbs[i] + '\t' + str(output_verb[i, 0]) + '\t' + str( output_verb[i, 1]) + '\t' + str(output_verb[i, 2]) + '\t' + str( output_verb[i, 3]) + '\n' classfile.write(text) classfile.close() ##### Export alpha values to csv file for objects ######################################################################################################### ##### Export selected objects to create a heatmap ######################################################################################################### query_objects = [ 'dining_table', 'oven', 'refrigerator', 'motorcycle', 'horse', 'car', 'snowboard', 'skis', 'skateboard', 'bowl', 'orange', 'donut' ] contexts = ['objects', 'local scene', 'deformation', 'part appearance'] query_objects = np.array(query_objects) heatmap = np.zeros((query_objects.shape[0], 4), dtype=np.float32) for i in range(query_objects.shape[0]): index = np.where(unique_objects == query_objects[i]) temp = output_object[index] heatmap[i] = temp print(heatmap) sio.savemat('./analysis/exp3_heatmap_object.mat', { 'heatmap': heatmap, 'objects': query_objects, 'contexts': contexts }) ##### Export selected objects to create a heatmap ######################################################################################################### query_verbs = [ 'eat_at', 'clean', 'cook', 'race', 'row', 'drive', 'throw', 'stand_on', 'jump', 'cut_with', 'brush_with', 'eat' ] contexts = ['objects', 'local scene', 'deformation', 'part appearance'] query_verbs = np.array(query_verbs) heatmap = np.zeros((query_verbs.shape[0], 4), dtype=np.float32) for i in range(query_verbs.shape[0]): index = np.where(unique_verbs == query_verbs[i]) temp = output_verb[index] heatmap[i] = temp print(heatmap) sio.savemat('./analysis/exp3_heatmap_verb.mat', { 'heatmap': heatmap, 'objects': query_verbs, 'contexts': contexts })
def train_human_object_multiple_context_gating(soft_flag=True, backbone='rcnn'): n_epochs = 100 batch_size_tr = 32 batch_size_te = 32 n_classes = N_CLASSES if backbone == 'rcnn': print('Using backbone rcnn') feature_path_interaction = Pth( 'Hico/features/h5/features_base_subject_object.h5') n_channels, n_regions, channel_side_dim = 4096, 12, 1 (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction, ['x_tr', 'x_te']) x_tr = np.swapaxes(x_tr, 1, 2) x_te = np.swapaxes(x_te, 1, 2) elif backbone == 'pairatt': print('Using backbone pairatt') feature_path_interaction = Pth('Hico/features/h5/features_pairattn.h5') n_channels, n_regions, channel_side_dim = 4096, 3, 1 (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction, ['x_tr', 'x_te']) # Features of the pose: f_context feature_path_c3 = Pth('Hico/features/h5/deformation.h5') x_cs_shape = [(512, 1, 1, 1)] # Features of the pose: f_context feature_path_c1 = Pth('Hico/features/h5/lvis.h5') x_cs_shape = [(1300, 1, 1, 1)] feature_path_c2 = Pth('Hico/features/h5/local_scene.h5') x_cs_shape = [(2048, 1, 1, 1)] feature_path_context = Pth('Hico/features/h5/stuff.h5') x_cs_shape = [(649, 1, 1, 1)] # Features of the pose: f_context feature_path_context = Pth('Hico/features/h5/part_states.h5') x_cs_shape = [(1032, 1, 1, 1)] feature_path_c4 = Pth('Hico/features/h5/local_pose.h5') x_cs_shape = [(4096, 1, 1, 1)] x_cs_shape = [(1300, 1, 1, 1), (2048, 1, 1, 1), (512, 1, 1, 1), (4096, 1, 1, 1)] # Annotation of the image annot_path = Pth('Hico/features/h5/anno_hico.pkl') model_name = 'classifier_%s' % (utils.timestamp()) input_shape = (n_channels, n_regions, channel_side_dim, channel_side_dim) print('--- start time') print(datetime.datetime.now()) print('... loading data') t1 = time.time() (img_names_tr, y_tr, y_tr_mask, img_names_te, y_te, y_te_mask) = utils.pkl_load(annot_path) y_tr = y_tr.astype(np.float32) y_te = y_te.astype(np.float32) y_tr_mask = y_tr_mask.astype(np.float32) y_te_mask = y_te_mask.astype(np.float32) print('... context features') (x_tr_c1, x_te_c1) = utils.h5_load_multi(feature_path_c1, ['x_tr', 'x_te']) #x_tr_c1 = expand_feats(x_tr_c1) #x_te_c1 = expand_feats(x_te_c1) (x_tr_c2, x_te_c2) = utils.h5_load_multi(feature_path_c2, ['x_tr', 'x_te']) x_tr_c2 = expand_feats(x_tr_c2) x_te_c2 = expand_feats(x_te_c2) (x_tr_c3, x_te_c3) = utils.h5_load_multi(feature_path_c3, ['x_tr', 'x_te']) x_tr_c3 = expand_feats(x_tr_c3) x_te_c3 = expand_feats(x_te_c3) (x_tr_c4, x_te_c4) = utils.h5_load_multi(feature_path_c4, ['x_tr', 'x_te']) x_tr_c4 = expand_feats(x_tr_c4) x_te_c4 = expand_feats(x_te_c4) print('train_set_shape_interaction: ', x_tr.shape) print('test_set_shape_interaction: ', x_te.shape) print('train_set_shape_context-1: ', x_tr_c1.shape) print('test_set_shape_context-1: ', x_te_c1.shape) print('train_set_shape_context-2: ', x_tr_c2.shape) print('test_set_shape_context-2: ', x_te_c2.shape) print('train_set_shape_context-3: ', x_tr_c3.shape) print('test_set_shape_context-3: ', x_te_c3.shape) print('train_set_shape_context-4: ', x_tr_c4.shape) print('test_set_shape_context-4: ', x_te_c4.shape) t2 = time.time() duration = t2 - t1 print('... loading data, duration (sec): %d' % (duration)) # building the model print('... building model %s' % (model_name)) t1 = time.time() if soft_flag == True: print('Training soft fusion model') model = ClassifierContextLateFusionMultiSoftGate( n_classes, input_shape, x_cs_shape) t2 = time.time() duration = t2 - t1 model = model.cuda() input_sizes = [input_shape] + list(x_cs_shape) #pytorch_utils.model_summary_multi_input(model, input_sizes=input_sizes, batch_size=-1, device='cuda') print('... model built, duration (sec): %d' % (duration)) # callbacks callbacks = [] print( 'Interaction_feat: %s, Context_feat-1: %s, Context_feat-2: %s, Context_feat-3: %s\n' % (feature_path_interaction, feature_path_c1, feature_path_c2, feature_path_c3)) # start training pytorch_utils.train_model_custom_metric_mask( model, model._optimizer, model._loss_fn, model._metric_fn, [x_tr, x_tr_c1, x_tr_c2, x_tr_c3, x_tr_c4], y_tr, y_tr_mask, [x_te, x_te_c1, x_te_c2, x_te_c3, x_te_c4], y_te, y_te_mask, n_epochs, batch_size_tr, batch_size_te, callbacks=callbacks) print('--- finish time') print(datetime.datetime.now())
print('Result of multi-head gating exp: %02.02f' %(acc_te)) ###### multi-head gating inference loop (for alphas) ########################################################################################################################## ''' ###### multi-head gating inference loop (for alphas) ########################################################################################################################## backbone = 'rcnn' ablation = False if backbone == 'rcnn': feature_path_interaction = Pth( 'Hico/features/h5/features_base_subject_object.h5') n_channels, n_regions, channel_side_dim = 4096, 12, 1 (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction, ['x_tr', 'x_te']) x_te = np.swapaxes(x_te, 1, 2) if ablation == False: path_model = '/var/scratch/mkilicka/data/hico/models_finetuned/late_hard_gating_for_hico/model.pt' path_save = '/var/scratch/mkilicka/code/context-driven-interactions/submission/data/hico/results/gumbel_softmax_hard_gating.h5' else: path_model = '/var/scratch/mkilicka/data/hico/models_finetuned/late_hard_ablated_gating_for_hico/model.pt' path_save = '/var/scratch/mkilicka/code/context-driven-interactions/submission/data/hico/results/gumbel_softmax_hard_ablated_gating.h5' print('backbone:', backbone) input_shape = (n_channels, n_regions, channel_side_dim, channel_side_dim) feature_path_c1 = Pth('Hico/features/h5/lvis.h5') feature_path_c2 = Pth('Hico/features/h5/local_scene.h5') feature_path_c3 = Pth('Hico/features/h5/deformation.h5')