def _read_features(self): try: feature_path_context = ops.join( opt.text_path, opt.contextualization, self.video_idx, '%s_%s.npy' % (self.video_idx, self.scene_idx)) self.features = np.load(feature_path_context) except FileNotFoundError: print(feature_path_context) feature_path = ops.join( opt.text_path, self.video_idx, '%s_%s.npy' % (self.video_idx, self.scene_idx)) try: self.features = np.load(feature_path) except ValueError as err: print('%s\n%s' % (err, feature_path)) raise ValueError(err) self.features = self.features.reshape( (-1, opt.text_layers, opt.text_dim)) if opt.contextualization == 'second-to-last': self.second_to_last() if opt.contextualization == 'last': self.last() if opt.contextualization == 'sum-all': self.sum_all() if opt.contextualization == 'sum-last-4': self.sum_last_n() if opt.contextualization == 'cat-last-4': self.cat_last_n() feature_dir_path = ops.join(opt.text_path, opt.contextualization) dir_check(feature_dir_path) feature_dir_path = ops.join(feature_dir_path, self.video_idx) dir_check(feature_dir_path) np.save(feature_path_context, self.features)
def get_features_by_track(self, track=None, idx=None, name=''): if idx in self.cached_tracks: return self.cached_tracks[idx] if name: path = ops.join(opt.visual_path, 'cached', 'tracks', self.video_idx) try: first, last, len_tr = track[0]['frame'], track[-1][ 'frame'], len(track) except IndexError: track_vis = np.zeros((1, opt.visual_dim)) if idx is not None: self.cached_tracks[idx] = track_vis return track_vis name = '_'.join(name.split('/')) name = '_'.join(name.split()) fname = '%s_track.%s.%d-%d(%d).npy' % (self.fname, name, first, last, len_tr) try: track_vis = np.load(ops.join(path, fname)) except FileNotFoundError: pass else: if idx is not None: self.cached_tracks[idx] = track_vis return track_vis track_vis = self.visual.get_features_by_track(track) track_vis = self.f_visual(track_vis, axis=0, keepdims=True) if name: dir_check(path) np.save(ops.join(path, fname), track_vis) if idx is not None: self.cached_tracks[idx] = track_vis return track_vis
def save_embed_feat(self): dir_check(ops.join(opt.data, 'embed')) dir_check(ops.join(opt.data, 'embed', opt.subaction)) for video in self._videos: video_features = self._embedded_feat[video.global_range] feat_name = opt.resume_str + '_%s' % video.name np.savetxt(ops.join(opt.data, 'embed', opt.subaction, feat_name), video_features)
def update(model_name): opt_d = vars(opt) if torch.cuda.is_available(): opt_d['device'] = 'cuda' else: opt_d['device'] = 'cpu' torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) np.random.seed(opt.seed) random.seed(opt.seed) torch.backends.cudnn.deterministic = True cudnn.benchmark = False # visual features root = '/sequoia/data1/akukleva/datasets/moviegraph/' opt_d['visual_path'] = opt.data_root + '/features/spat_i3d' opt_d['visual_dim'] = 2048 opt_d['sampling_fr'] = 0.0625 # text features opt_d['bert_model'] = 'bert_base_uncased' opt_d['text_path'] = opt.data_root + '/features/bert/bert_base' opt_d['text_dim'] = 768 opt_d['text_layers'] = 12 if opt.feature_type == 'v': opt_d['text_dim'] = 0 if opt.feature_type == 't': opt_d['visual_dim'] = 0 opt_d['mlp_dim'] = opt_d['visual_dim'] + opt_d['text_dim'] if opt.tracks: opt_d['track_dim'] = opt.visual_dim opt.mlp_dim = opt.mlp_dim + opt.track_dim * 2 opt_d['model_name'] = model_name dir_check(opt.visual_path) opt.dialogs_path = opt.data_root + opt.dialogs_path opt.frame2time_path = opt.data_root + opt.frame2time_path opt.labeled_interactions = opt.data_root + opt.labeled_interactions opt.merged_interactions = opt.data_root + opt.merged_interactions opt.annotations = opt.data_root + opt.annotations opt.split_path = opt.data_root + opt.split_path opt.intersected = opt.data_root + opt.intersected opt.relships2_15 = opt.data_root + opt.relships2_15 opt.relships_opp = opt.data_root + opt.relships_opp opt.merged_videos = opt.data_root + opt.merged_videos opt.ftack_ids = opt.data_root + opt.ftack_ids opt.ftracks = opt.data_root + opt.ftracks opt.orig_res = opt.data_root + opt.orig_res sys.path.append(opt.project_root + '/moviegraphs/py3loader/') vars_iter = list(vars(opt)) for arg in sorted(vars_iter): print('%s: %s' % (arg, getattr(opt, arg)))
def __init__(self, n=4, path=''): ''' :param n: how many models to store ''' self.n = n self.eval = defaultdict(dict) self.models = defaultdict(dict) self.worst_idx = defaultdict(lambda: -1) self.saved = defaultdict(dict) self.path = path dir_check(path)
def save(self): for key in self.eval: dir_check(ops.join(self.path, key)) saved = list(self.saved[key].values()) for filename in os.listdir(ops.join(self.path, key)): if ops.join(self.path, key, filename) not in saved: os.remove(ops.join(self.path, key, filename)) for epoch, val in self.eval[key].items(): path = ops.join(self.path, key, 'v%.4f_ep%d.pth.tar' % (val, epoch)) if path not in saved: self.saved[key][epoch] = path torch.save(self.models[key][epoch], path)
def update(): args_map = { 'text_features': '', 'contextualization': '', 'lr': 'lr', 'epochs': 'ep', 'batch_size': 'bs', 'model_name': '', 'log_prefix': '', 'mlp_dim': 'dim', 'inter_class': 'ic', 'feature_type': 'ft_', 'pool_features': 'pf_' } # default='/sequoia/data2/akukleva/moviegraph/features/bert' opt_d = vars(opt) if torch.cuda.is_available(): opt_d['device'] = 'cuda' else: opt_d['device'] = 'cpu' if opt.text_features == 'bert_base': opt_d['bert_model'] = 'bert-base-uncased' if ops.isdir('/sequoia/data2/'): opt_d[ 'text_path'] = '/sequoia/data2/akukleva/moviegraph/features/bert/bert_base' else: opt_d[ 'text_path'] = '/sequoia/data1/akukleva/projects/inter_recog/moviegraphs/bert_base' opt_d['text_dim'] = 768 opt_d['text_layers'] = 12 if opt.text_features == 'bert_large': opt_d['bert_model'] = '' opt_d[ 'text_path'] = '/sequoia/data1/akukleva/projects/inter_recog/moviegrophs/bert_large' opt_d['text_dim'] = 1024 opt_d['text_layers'] = 24 opt_d['mlp_dim'] = opt_d['text_dim'] opt_d['model_name'] = 'mlp_text' dir_check(opt.text_path) vars_iter = list(vars(opt)) for arg in sorted(vars_iter): print('%s: %s' % (arg, getattr(opt, arg)))
def get_features_by_time(self, time_node=None, idx=None): '''Given time range return corresponding features ''' if idx in self.cached: return self.cached[idx] path = ops.join(opt.visual_path, 'cached', 'time', '%s' % opt.feature_type, self.video_idx) str_time_node = '_'.join(str(time_node).split()) fname = '%s_time_%s.npy' % (self.fname, str_time_node) try: features = np.load(ops.join(path, fname)) except FileNotFoundError: pass else: if idx is not None: self.cached[idx] = features return features features = None if opt.feature_type in ['m', 'v']: features = self.f_visual( self.visual.get_features_by_time(time_node), axis=0, keepdims=True) if not opt.spat_pool: shape = features.shape assert shape[0] == 1 features = np.mean(features.reshape((shape[0], shape[1], -1)), axis=2) if opt.feature_type in ['m', 't']: textual = self.f_text(self.textual.get_features_by_time(time_node), axis=0).reshape(1, -1) features = join_data(textual, features, np.hstack) dir_check(path) np.save(ops.join(path, fname), features) if idx is not None: self.cached[idx] = features return features
def __init__(self, subaction='coffee'): """ Args: Q: number of Gaussian components in each mixture subaction: current name of complex activity """ np.random.seed(opt.seed) self.gt_map = GroundTruth(frequency=opt.frame_frequency) self.gt_map.load_mapping() self._K = self.gt_map.define_K(subaction=subaction) logger.debug('%s subactions: %d' % (subaction, self._K)) self.iter = 0 self.return_stat = {} self._acc_old = 0 self._videos = [] self._subaction = subaction # init with ones for consistency with first measurement of MoF self._subact_counter = np.ones(self._K) self._gaussians = {} self._inv_count_stat = np.zeros(self._K) self._embedding = None self._gt2label = None self._label2gt = {} self._with_bg = opt.bg self._total_fg_mask = None # multiprocessing for sampling activities for each video self._features = None self._embedded_feat = None self._init_videos() logger.debug('min: %f max: %f avg: %f' % (np.min( self._features), np.max(self._features), np.mean(self._features))) # to save segmentation of the videos dir_check(os.path.join(opt.data, 'segmentation')) dir_check(os.path.join(opt.data, 'likelihood')) self.vis = None # visualization tool
def plot(self, iter=0, show=True, prefix=''): if iter is not None: self._counter = iter if 20 in self._labels: self._labels = np.array(self._labels) mask = self._labels == 20 self._labels[mask] = 10 plt.axis('off') plt.scatter(self._result[..., 0], self._result[..., 1], c=self._labels, s=self._sizes, alpha=1) plt.grid(True) if prefix == 'time_': plt.colorbar() if self._save: # plt.figure(figsize=(1)) dir_check(join(opt.dataset_root, 'plots')) dir_check(join(opt.dataset_root, 'plots', opt.subaction)) # name = ['iter%d_' % self._counter, 'gt_'][gt_plot] name = prefix + '%s_%s_' % (opt.subaction, opt.model_name) folder_name = opt.log_str dir_check( join(opt.dataset_root, 'plots', opt.subaction, folder_name)) folder_name = join(opt.log_str, opt.vis_mode) dir_check( join(opt.dataset_root, 'plots', opt.subaction, folder_name)) if self.svg: name += '_%s.svg' % self._mode else: name += '_%s.png' % self._mode # plt.savefig(join(opt.dataset_root, 'plots', opt.subaction, # folder_name, name), dpi=400) plt.savefig(join(opt.dataset_root, 'plots', opt.subaction, folder_name, name), transparent=True, dpi=300) np.savetxt( join(opt.dataset_root, 'plots', opt.subaction, folder_name, '%s.txt' % opt.vis_mode), self._result) if show: plt.show()
def save_from_data2_to_data1(): if 'data2' not in opt.text_path: raise FileNotFoundError('Check location of the features') p_in = '/sequoia/data1/akukleva/projects/inter_recog/moviegraphs/bert_base' # f_dataloader(mode='train') f_dataloader(mode='val') # f_dataloader(mode='test') feature_dir_path = ops.join(p_in, opt.contextualization) dir_check(feature_dir_path) for root, dirs, files in os.walk( ops.join(opt.text_path, opt.contextualization)): for dirname in dirs: dir_check(ops.join(feature_dir_path, dirname)) for filename in files: if filename.endswith('npy'): dirname = ops.join(feature_dir_path, root.split('/')[-1]) dir_check(dirname) shutil.copy(ops.join(root, filename), ops.join(dirname, filename))
def save_likelihood(self): """Used for multiprocessing""" dir_check(os.path.join(opt.data, 'likelihood')) np.savetxt(os.path.join(opt.data, 'likelihood', self.name), self._likelihood_grid)
def save_obj(obj, name): dir_check(os.path.join(opt.gt, 'mapping')) path = os.path.join(opt.gt, 'mapping', '%s.pkl' % name) with open(path, 'wb') as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def training(train_loader, epochs, save, **kwargs): """Training pipeline for embedding. Args: train_loader: iterator within dataset epochs: how much training epochs to perform n_subact: number of subactions in current complex activity mnist: if training with mnist dataset (just to test everything how well it works) Returns: trained pytorch model """ logger.debug('create model') # make everything deterministic -> seed setup torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) np.random.seed(opt.seed) random.seed(opt.seed) torch.backends.cudnn.deterministic = True model = kwargs['model'] loss = kwargs['loss'] optimizer = kwargs['optimizer'] cudnn.benchmark = True batch_time = Averaging() data_time = Averaging() losses = Averaging() adjustable_lr = opt.lr logger.debug('epochs: %s', epochs) for epoch in range(epochs): # model.cuda() model.to(opt.device) model.train() logger.debug('Epoch # %d' % epoch) if opt.lr_adj: # if epoch in [int(epochs * 0.3), int(epochs * 0.7)]: # if epoch in [int(epochs * 0.5)]: if epoch % 30 == 0 and epoch > 0: adjustable_lr = adjust_lr(optimizer, adjustable_lr) logger.debug('lr: %f' % adjustable_lr) end = time.time() for i, (features, labels) in enumerate(train_loader): data_time.update(time.time() - end) features = features.float() labels = labels.float().to(opt.device) if opt.device == 'cuda': features = features.cuda(non_blocking=True) # features = features.float().cuda(non_blocking=True) # labels = labels.float().cuda() output = model(features) loss_values = loss(output, labels) losses.update(loss_values.item(), features.size(0)) optimizer.zero_grad() loss_values.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % 100 == 0 and i: logger.debug( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) logger.debug('loss: %f' % losses.avg) losses.reset() opt.resume_str = join(opt.dataset_root, 'models', kwargs['name'], '%s.pth.tar' % opt.log_str) if save: save_dict = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } dir_check(join(opt.dataset_root, 'models')) dir_check(join(opt.dataset_root, 'models', kwargs['name'])) torch.save(save_dict, opt.resume_str) return model
def clip_name(root, filename): clip = re.search(r'(tt\d*)', root).group(1) scene = re.search(r'scene-(\d*)\.', filename).group(1) dir_check(ops.join(opt.text_path, clip)) return clip + '_' + scene
def accuracy_corpus(self, prefix=''): """Calculate metrics as well with previous correspondences between gt labels and output labels""" accuracy = Accuracy() f1_score = F1Score(K=self._K, n_videos=len(self._videos)) long_gt = [] long_pr = [] long_rel_time = [] self.return_stat = {} for video in self._videos: long_gt += list(video.gt) long_pr += list(video._z) try: long_rel_time += list(video.temp) except AttributeError: pass # logger.debug('no poses') accuracy.gt_labels = long_gt accuracy.predicted_labels = long_pr if opt.bg: # enforce bg class to be bg class accuracy.exclude[-1] = [-1] old_mof, total_fr = accuracy.mof(old_gt2label=self._gt2label) self._gt2label = accuracy._gt2cluster self._label2gt = {} for key, val in self._gt2label.items(): try: self._label2gt[val[0]] = key except IndexError: pass acc_cur = accuracy.mof_val() logger.debug('%sAction: %s' % (prefix, self._subaction)) logger.debug('%sMoF val: ' % prefix + str(acc_cur)) logger.debug('%sprevious dic -> MoF val: ' % prefix + str(float(old_mof) / total_fr)) accuracy.mof_classes() accuracy.iou_classes() self.return_stat = accuracy.stat() f1_score.set_gt(long_gt) f1_score.set_pr(long_pr) f1_score.set_gt2pr(self._gt2label) if opt.bg: f1_score.set_exclude(-1) f1_score.f1() for key, val in f1_score.stat().items(): self.return_stat[key] = val for video in self._videos: video.segmentation[video.iter] = (video._z, self._label2gt) if opt.vis: ######################################################################## # VISUALISATION if opt.vis_mode != 'segm': long_pr = [self._label2gt[i] for i in long_pr] if self.vis is None: self.vis = Visual(mode=opt.vis_mode, save=True, reduce=None) self.vis.fit(self._embedded_feat, long_pr, 'iter_%d' % self.iter) else: reset = prefix == 'final' self.vis.color(labels=long_pr, prefix='iter_%d' % self.iter, reset=reset) else: #################################################################### # visualisation of segmentation if prefix == 'final': colors = {} cmap = plt.get_cmap('tab20') for label_idx, label in enumerate(np.unique(long_gt)): if label == -1: colors[label] = (0, 0, 0) else: # colors[label] = (np.random.rand(), np.random.rand(), np.random.rand()) colors[label] = cmap(label_idx / len(np.unique(long_gt))) dir_check(os.path.join(opt.dataset_root, 'plots')) dir_check( os.path.join(opt.dataset_root, 'plots', opt.subaction)) fold_path = os.path.join(opt.dataset_root, 'plots', opt.subaction, 'segmentation') dir_check(fold_path) for video in self._videos: path = os.path.join(fold_path, video.name + '.png') name = video.name.split('_') name = '_'.join(name[-2:]) plot_segm(path, video.segmentation, colors, name=name) #################################################################### #################################################################### return accuracy.frames()
""" From one hot encoding labeling to my format of gt """ __author__ = 'Anna Kukleva' __date__ = 'September 2018' import os import re import numpy as np from utils.arg_pars import logger, opt from utils.util_functions import dir_check actions = ['coffee', 'changing_tire', 'cpr', 'jump_car', 'repot'] gt_folder = '/media/data/kukleva/lab/YTInstructions/VISION_txt_annot' dir_check(opt.gt) label2idx = {} idx2label = {} videos = {} for root, dirs, files in os.walk(gt_folder): for filename in files: segmentation = [] with open(os.path.join(root, filename), 'r') as f: for line in f: line = line.split() line = list(map(lambda x: int(x), line)) label = -1 if line[-1] == 1 else np.where(line)[0][0] if label != -1:
def training(train_dataset, **kwargs): train_start_time = datetime.now().strftime('%Y%m%d-%H%M%S') print('set parameters and model, train start time: %s' % train_start_time) model = kwargs['model'] loss = kwargs['loss'] optimizer = kwargs['optimizer'] batch_time = Averaging() data_time = Averaging() losses = Averaging() train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=False) print('epochs: %s', opt.epochs) model_saver_val = ModelSaver(path=opt.store_root) for epoch in range(opt.epochs): model.to(opt.device) model.train() train_dataset.epoch = epoch print('Epoch # %d' % epoch) end = time.time() counter = 0 if opt.tr_sum_max: if epoch == 20: opt.tr_sum_max_flag = True for i, input in enumerate(train_dataloader): data_time.update(time.time() - end) labels = input['labels'] if len(labels) == 1: continue output = model(input) loss_values = loss(output, input) losses.update(loss_values.item(), len(labels)) optimizer.zero_grad() loss_values.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() counter += len(labels) if i % 10 == 0 and i: print( 'Epoch: [{0}][{1}/{2}]\tTime {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\tLoss {loss.val:.4f} ({loss.avg:.4f})\t' .format(epoch, i, len(train_dataloader), batch_time=batch_time, data_time=data_time, loss=losses)) print(counter) print('loss: %f' % losses.avg) losses.reset() if epoch % opt.test_fr == 0: testing(train_dataset, model, loss, total_iter=epoch, mode='train', train_start_time=train_start_time) if opt.test: check_val = testing(kwargs['val_dataset'], model, loss, total_iter=epoch, train_start_time=train_start_time, mode='val') if model_saver_val.check(check_val): save_dict = { 'epoch': epoch, 'state_dict': copy.deepcopy(model.state_dict()), 'optimizer': copy.deepcopy(optimizer.state_dict().copy()) } model_saver_val.update(check_val, save_dict, epoch) testing(kwargs['test_dataset'], model, loss, total_iter=epoch, train_start_time=train_start_time, mode='test') print(opt.log_prefix) if opt.save_model and opt.save_model_often and epoch % 30 == 0: model_saver_val.save() check_str = join(opt.store_root) opt.resume_str = join(check_str, '%d.pth.tar' % epoch) if opt.save_model: save_dict = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } dir_check(check_str) torch.save(save_dict, opt.resume_str) return model