def train(): logger.debug('.') model = models.resnet18(pretrained=True) num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, 2) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.cuda() loss = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dataset = SoccerDataset(transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) training(dataloader, model=model, loss=loss, optimizer=optimizer)
def test_img(path): logger_setup() img = Image.open(path) transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) img = transform(img) img = img.numpy() img = img[np.newaxis, ...] img = torch.Tensor(img) model = models.resnet18() num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, 2) model.load_state_dict(load_model(epoch=9)) model.eval() with torch.no_grad(): output = model(img).numpy() output = np.exp(output) / np.sum(np.exp(output)) logger.debug(str(output)) idx = np.argmax(output) logger.debug('prediction: %s' % ['not soccer', 'soccer'][idx])
def __init__(self, videos, features): logger.debug('Creating feature dataset') self._features = features self._gt = None # self._videos_features = features self._videos = videos
def gaussian_model(self): logger.debug('Fit Gaussian Mixture Model to the whole dataset at once') self._gaussians_fit() for video_idx in range(len(self._videos)): self._video_likelihood_grid(video_idx) if opt.bg: scores = None for video in self._videos: scores = join_data(scores, video.get_likelihood(), np.vstack) bg_trh_score = np.sort(scores, axis=0)[int( (opt.bg_trh / 100) * scores.shape[0])] bg_trh_set = [] for action_idx in range(self._K): new_bg_trh = self._gaussians[ action_idx].mean_score - bg_trh_score[action_idx] self._gaussians[action_idx].update_trh(new_bg_trh=new_bg_trh) bg_trh_set.append(new_bg_trh) logger.debug('new bg_trh: %s' % str(bg_trh_set)) trh_set = [] for action_idx in range(self._K): trh_set.append(self._gaussians[action_idx].trh) for video in self._videos: video.valid_likelihood_update(trh_set)
def save_mp(config, folder_probs): logger.debug('Multiprocessing: forward data and save probabilities') dataset = TestDataset(config) model, loss, optimizer = mlp.create_model(config, n_classes=513) model.load_state_dict(load_model(config, epoch=config["test_epoch"])) save_item = SaveProbsMP(config, model, dataset, folder_probs) save_item.save_probs_mp()
def __init__(self, transform=None): np.random.seed(opt.seed) self.transform = transform self.soccer_folders = opt.save_folder self.pascal = opt.pascal self.valid_frames = valid_frame_parser() self.image_pathes = [] self.labels = [] for video_name in self.valid_frames: sample_fraction = len( self.valid_frames[video_name]) * opt.sample_rate sampled_frames = np.random.choice(self.valid_frames[video_name], int(sample_fraction), replace=False) cur_path = os.path.join(self.soccer_folders, video_name) for i in sampled_frames: self.image_pathes.append(os.path.join(cur_path, '%d.png' % i)) self.labels.append(1) logger.debug('%d soccers' % len(self.image_pathes)) for file_idx, filename in enumerate(os.listdir(self.pascal)): self.image_pathes.append(os.path.join(self.pascal, filename)) self.labels.append(0) if file_idx > 7000: break logger.debug('%d with pascal' % len(self.image_pathes))
def update_opt_str(): log_str = '' logs_args = sorted([ 'dataset', 'full', 'epochs', 'embed_dim', 'data_type', 'ordering', 'gmm', 'gmms', 'gt_training', 'lr', 'lr_adj', 'zeros', 'bg', 'viterbi', 'reg_cov' ]) logs_args = ['prefix', 'subaction'] + logs_args for arg in logs_args: attr = getattr(opt, arg) arg = arg.split('_')[0] if isinstance(attr, bool): if attr: attr = arg else: attr = '!' + arg elif isinstance(attr, str): attr = attr.split('_')[0] else: attr = '%s%s' % (arg, str(attr)) log_str += '%s_' % attr opt.log_str = log_str vars_iter = list(vars(opt)) for arg in sorted(vars_iter): logger.debug('%s: %s' % (arg, getattr(opt, arg)))
def update_opt_str(config): logs_args_map = { 'epochs': 'ep', 'embed_dim': 'dim', 'lr': 'lr', 'act_func': '', 'init_mean': 'im', 'init_var': 'iv', 'bias': 'bias' } log_str = '' logs_args = ['prefix'] + sorted(logs_args_map) logs_args_map['prefix'] = '' logs_args_map['subaction'] = '' for arg in logs_args: attr = config[arg] arg = logs_args_map[arg] if isinstance(attr, bool): if attr: attr = arg else: attr = '!' + arg else: attr = '%s%s' % (arg, str(attr)) log_str += '%s_' % attr config["log_str"] = log_str for arg in config: logger.debug('%s: %s' % (arg, config[arg]))
def load_data(root_dir, end, subaction, videos=None, names=None, features=None): """Create dataloader within given conditions Args: root_dir: path to root directory with features end: extension of files subaction: complex activity videos: collection of object of class Video names: empty list as input to have opportunity to return dictionary with correspondences between names and indices features: features for the whole video collection regression: regression training vae: dataset for vae with incorporated relative time in features Returns: iterative dataloader number of subactions in current complex activity """ logger.debug('create DataLoader') dataset = FeatureDataset(root_dir, end, subaction, videos=videos, features=features) if names is not None: names[0] = dataset.index2name() dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=(not opt.save_embed_feat), num_workers=opt.num_workers) return dataloader, dataset.n_subact()
def wrap(*args, **kwargs): time1 = time.time() ret = f(*args, **kwargs) time2 = time.time() logger.debug('%s took %0.3f ms ~ %0.3f min ~ %0.3f sec' % (f, (time2 - time1) * 1000.0, (time2 - time1) / 60.0, (time2 - time1))) return ret
def load_ground_truth(videos, features, shuffle=True): logger.debug('load data with ground truth labels for training some embedding') dataset = GTDataset(videos, features) dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=shuffle, num_workers=opt.num_workers) return dataloader
def _create_voting_table(self): """Filling table with assignment scores. Create table which represents paired label assignments, i.e. each cell comprises score for corresponding label assignment""" size = max(len(np.unique(self._gt_labels_subset)), len(np.unique(self._predicted_labels))) self._voting_table = np.zeros((size, size)) for idx_gt, gt_label in enumerate(np.unique(self._gt_labels_subset)): self._gt_label2index[gt_label] = idx_gt self._gt_index2label[idx_gt] = gt_label if len(self._gt_label2index) < size: for idx_gt in range(len(np.unique(self._gt_labels_subset)), size): gt_label = idx_gt while gt_label in self._gt_label2index: gt_label += 1 self._gt_label2index[gt_label] = idx_gt self._gt_index2label[idx_gt] = gt_label for idx_pr, pr_label in enumerate(np.unique(self._predicted_labels)): self._pr_label2index[pr_label] = idx_pr self._pr_index2label[idx_pr] = pr_label if len(self._pr_label2index) < size: for idx_pr in range(len(np.unique(self._predicted_labels)), size): pr_label = idx_pr while pr_label in self._pr_label2index: pr_label += 1 self._pr_label2index[pr_label] = idx_pr self._pr_index2label[idx_pr] = pr_label for idx_gt, gt_label in enumerate(np.unique(self._gt_labels_subset)): if gt_label in list(self.exclude.keys()): continue gt_mask = self._gt_labels_subset == gt_label for idx_pr, pr_label in enumerate(np.unique( self._predicted_labels)): if pr_label in list(self.exclude.values()): continue self._voting_table[idx_gt, idx_pr] = \ np.sum(self._predicted_labels[gt_mask] == pr_label, dtype=float) for key, val in self.exclude.items(): # works only if one pair in exclude assert len(self.exclude) == 1 try: self._voting_table[ self._gt_label2index[key], self._pr_label2index[val[0]]] = size * np.max( self._voting_table) except KeyError: logger.debug('No background!') self._voting_table[self._gt_label2index[key], -1] = size * np.max(self._voting_table) self._pr_index2label[size - 1] = val[0] self._pr_label2index[val[0]] = size - 1
def save_probs_mp(self, n_threads=1): logger.debug('.') procs = [] for i in range(n_threads): p = mp.Process(target=self.save_probs_queue) procs.append(p) p.start() for p in procs: p.join()
def _update_fg_mask(self): logger.debug('.') if self._with_bg: self._total_fg_mask = np.zeros(len(self._features), dtype=bool) for video in self._videos: self._total_fg_mask[np.nonzero( video.global_range)[0][video.fg_mask]] = True else: self._total_fg_mask = np.ones(len(self._features), dtype=bool)
def resume_segmentation(iterations=10): logger.debug('Resume segmentation') corpus = Corpus(Q=opt.gmm, subaction=opt.subaction) for iteration in range(iterations): logger.debug('Iteration %d' % iteration) corpus.iter = iteration corpus.resume_segmentation() corpus.accuracy_corpus() corpus.accuracy_corpus()
def f1(self): self._finish_init() for iteration in range(self.n_experiments): self._sampling() f1_mean = np.mean(self.f1_scores) logger.debug('f1 score: %f' % f1_mean) self._n_true_seg_all /= self.n_experiments self._return['precision'] = [self._n_true_seg_all, (self._K * self._n_videos)] self._return['recall'] = [self._n_true_seg_all, len(self.bound_masks)] self._return['mean_f1'] = [f1_mean, 1]
def __init__(self, videos, features): logger.debug('Ground Truth labels') super().__init__(videos, features) for video in self._videos: gt_item = np.asarray(video.gt).reshape((-1, 1)) # video_features = self._videos_features[video.global_range] # video_features = join_data(None, (gt_item, video_features), # np.hstack) self._gt = join_data(self._gt, gt_item, np.vstack)
def _tmp_read(self): del self.features self.features = None tmp_path = ops.join(self.config["dataset_root"], self.tmp) tmp_list = [int(i.split('.')[0]) for i in os.listdir(tmp_path)] for file_idx in sorted(tmp_list): logger.debug(file_idx) tmp_file_path = ops.join(tmp_path, '%d.npy' % file_idx) tmp_feat = np.load(tmp_file_path) self.features = join_data(self.features, tmp_feat, np.vstack) os.remove(tmp_file_path)
def load_model(epoch=None): if opt.resume_str: resume_str = opt.resume_str else: resume_str = opt.log_str epoch = opt.epochs if epoch is None else epoch checkpoint = torch.load( ops.join(opt.dataset_root, 'models', '%s%d.pth.tar' % (resume_str, epoch))) checkpoint = checkpoint['state_dict'] logger.debug('loaded model: ' + '%s%d.pth.tar' % (resume_str, opt.epochs)) return checkpoint
def __init__(self, videos, features): logger.debug('Relative time labels') super().__init__(videos, features) temp_features = None # used only if opt.concat > 1 for video in self._videos: time_label = np.asarray(video.temp).reshape((-1, 1)) video_features = self._features[video.global_range] temp_features = join_data(temp_features, video_features, np.vstack) self._gt = join_data(self._gt, time_label, np.vstack)
def load_model(name=None): if opt.resume_str: subaction = opt.subaction.split('_')[0] resume_str = opt.resume_str % subaction # resume_str = opt.resume_str else: resume_str = opt.log_str checkpoint = torch.load( join(opt.dataset_root, 'models', name, '%s.pth.tar' % resume_str)) checkpoint = checkpoint['state_dict'] logger.debug('loaded model: ' + '%s.pth.tar' % resume_str) return checkpoint
def __init__(self, config): self.config = config self.features = None self.pathes = [] self.names = [] self.counter = 0 for idx, filename in enumerate(os.listdir(ops.join(self.config["dataset_root"], self.config["test_feat"]))): path = ops.join(self.config["dataset_root"], self.config["test_feat"], filename) self.pathes.append(path) self.names.append(filename) logger.debug(' %d videos' % len(self.names))
def load_reltime(videos, features, shuffle=True): logger.debug('load data with temporal labels as ground truth') torch.manual_seed(opt.seed) np.random.seed(opt.seed) if opt.model_name == 'mlp': dataset = RelTimeDataset(videos, features) if opt.model_name == 'tcn': dataset = TCNDataset(videos, features) dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=shuffle, num_workers=opt.num_workers) return dataloader
def save_probs(config, dataloader, model): dir_check(ops.join(config["dataset_root"], config["out_probs"])) logger.debug('forward data and save probabilities') torch.manual_seed(config["seed"]) model.eval() model.cpu() # not sure that need it with torch.no_grad(): for idx, (features, name) in enumerate(dataloader): output = model(features).numpy() np.save( ops.join(config["dataset_root"], config["out_probs"], name), output)
def viterbi_decoding(self): logger.debug('.') self._count_subact() pr_orders = [] for video_idx, video in enumerate(self._videos): if video_idx % 20 == 0: logger.debug('%d / %d' % (video_idx, len(self._videos))) self._count_subact() logger.debug(str(self._subact_counter)) video.viterbi() cur_order = list(video._pi) if cur_order not in pr_orders: logger.debug(str(cur_order)) pr_orders.append(cur_order) self._count_subact() logger.debug(str(self._subact_counter))
def load_model(config, epoch=None): if config["resume_str"]: resume_str = config["resume_str"] if resume_str.endswith('.pth.tar'): search = re.search(r'(.*_)\d*.pth.tar', resume_str) resume_str = search.group(1) else: resume_str = config["log_str"] epoch = config["epochs"] if epoch is None else epoch logger.debug('Loading model from: %s' % ops.join(config["model_folder"], '%s%d.pth.tar' % (resume_str, epoch))) checkpoint = torch.load( ops.join(config["model_folder"], '%s%d.pth.tar' % (resume_str, epoch))) checkpoint = checkpoint['state_dict'] logger.debug('loaded model: ' + '%s%d.pth.tar' % (resume_str, epoch)) return checkpoint
def parse_return_stat(stat): keys = ['mof', 'mof_bg', 'iou', 'iou_bg'] for key in keys: if key == 'f1': _eps = 1e-8 n_tr_seg, n_seg = stat['precision'] precision = n_tr_seg / n_seg _, n_tr_seg = stat['recall'] recall = n_tr_seg / n_tr_seg val = 2 * (precision * recall) / (precision + recall + _eps) else: v1, v2 = stat[key] if key == 'iou_bg': v2 += 1 # bg class val = v1 / v2 logger.debug('%s: %f' % (key, val))
def load_model(name='mlp'): if opt.resume_str: subaction = opt.subaction.split('_')[0] resume_str = opt.resume_str % subaction # resume_str = opt.resume_str else: resume_str = opt.log_str opt.resume_str = resume_str if opt.device == 'cpu': checkpoint = torch.load(join(opt.dataset_root, 'models', name, '%s.pth.tar' % resume_str), map_location='cpu') else: checkpoint = torch.load( join(opt.dataset_root, 'models', name, '%s.pth.tar' % resume_str)) checkpoint = checkpoint['state_dict'] logger.debug('loaded model: ' + '%s.pth.tar' % resume_str) return checkpoint
def baseline(iterations=7): """Implementation of the paper""" corpus = Corpus(Q=opt.gmm, subaction=opt.subaction) for iteration in range(iterations): logger.debug('Iteration %d' % iteration) corpus.iter = iteration corpus.accuracy_corpus() if (opt.gt_training and iteration == 0) or not opt.gt_training: corpus.embedding_training() # one version of gaussian mixtures for the entire dataset if opt.gmms == 'one': corpus.one_gaussian_model() # different gmm for different subsets of videos, i.e. leave one out for # each video subset elif opt.gmms == 'many': corpus.many_gaussian_models() # with multiprocessing package # corpus.gaussians_mp(n_threads=3) else: raise RuntimeError('define number of gmms for the video collection') if opt.viterbi: # corpus.viterbi_decoding() # corpus.accuracy_corpus(prefix='pure vit ') # corpus.viterbi_ordering() # take into account Mallow Model corpus.ordering_sampler() corpus.rho_sampling() # corpus.accuracy_corpus(prefix='vit+ord ') corpus.viterbi_decoding() # corpus.viterbi_alex_decoding() else: corpus.subactivity_sampler() # take into account Mallow Model corpus.ordering_sampler() corpus.rho_sampling() logger.debug('Iteration %d' % iteration) corpus.accuracy_corpus()
def all_actions(): return_stat_all = None if opt.dataset == 'bf': actions = [ 'coffee', 'cereals', 'tea', 'milk', 'juice', 'sandwich', 'scrambledegg', 'friedegg', 'salat', 'pancake' ] if opt.dataset == 'yti': actions = ['changing_tire', 'coffee', 'jump_car', 'cpr', 'repot'] if opt.dataset == 'fs': actions = ['-1.', '-2.'] lr_init = opt.lr for action in actions: opt.subaction = action if not opt.resume: opt.lr = lr_init update_opt_str() return_stat_single = temp_embed() return_stat_all = join_return_stat(return_stat_all, return_stat_single) logger.debug(return_stat_all) parse_return_stat(return_stat_all)