def _with_predictions(self): self._logger.debug('__init__') for video_idx, video in enumerate(self._videos): filename = re.match(r'[\.\/\w]*\/(\w+).\w+', video.path) if filename is None: logging.ERROR('Check paths videos, template to extract video name' ' does not match') filename = filename.group(1) self._videoname2idx[filename] = video_idx self._idx2videoname[video_idx] = filename names = np.asarray([video_idx] * video.n_frames).reshape((-1, 1)) idxs = np.asarray(list(range(0, video.n_frames))).reshape((-1, 1)) if self._regression: gt_file = np.asarray(video.pose.frame_labels).reshape((-1, 1)) else: if opt.gt_training: gt_file = np.asarray(video._gt).reshape((-1, 1)) else: gt_file = np.asarray(video._z).reshape((-1, 1)) if self._features is None: features = video.features() else: features = self._features[video.global_range] temp_feature_list = join_data(None, (names, idxs, gt_file, features), np.hstack) self._feature_list = join_data(self._feature_list, temp_feature_list, np.vstack) self._features = None
def _with_gt(self): self._logger.debug('__init__') fileindex = 0 len_file = join(self._root_dir, 'segments', 'lens.txt') with open(len_file, 'r') as f: for line in f: if self._subaction not in line: continue match = re.match(r'(\w*)\.\w*\s*(\d*)', line) filename = match.group(1) filepath = filename if opt.data_type == 2: filepath = self._subaction + '/' + filename self._videoname2idx[filename] = fileindex self._idx2videoname[fileindex] = filename fileindex += 1 n_frames = int(match.group(2)) # because of there can be inconsistency between number of gt labels and # corresponding number of frames for current representation if len(self.gt_map.gt[filename]) == n_frames: names = np.asarray([self._videoname2idx[filename]] * n_frames)\ .reshape((-1, 1)) idxs = np.asarray(list(range(0, n_frames))).reshape((-1, 1)) gt_file = np.asarray(self.gt_map.gt[filename]).reshape((-1, 1)) features = np.loadtxt(join(self._root_dir, 'ascii', filepath + '.%s' % self._end)) if opt.data_type == 2: features = features[:, 1:] temp_feature_list = join_data(None, (names, idxs, gt_file, features), np.hstack) else: min_len = np.min((len(self.gt_map.gt[filename]), n_frames)) names = np.asarray([self._videoname2idx[filename]] * min_len)\ .reshape((-1, 1)) idxs = np.asarray(list(range(0, min_len))).reshape((-1, 1)) gt_file = np.asarray(self.gt_map.gt[filename][:min_len]).reshape((-1, 1)) features = np.loadtxt(join(self._root_dir, 'ascii', filepath + '.%s' % self._end))[:min_len] if opt.data_type == 2: features = features[:, 1:] temp_feature_list = join_data(None, (names, idxs, gt_file, features), np.hstack) self._feature_list = join_data(self._feature_list, temp_feature_list, np.vstack)
def _for_vae(self): # todo: different types of including time domain self._logger.debug('__init__') for video_idx, video in enumerate(self._videos): self._videoname2idx[video.name] = video_idx self._idx2videoname[video_idx] = video.name names = np.asarray([video_idx] * video.n_frames).reshape((-1, 1)) idxs = np.asarray(list(range(0, video.n_frames))).reshape((-1, 1)) gt_file = np.zeros(video.n_frames).reshape((-1, 1)) if opt.vae_dim == 1: relative_time = np.asarray(video.pose.frame_labels).reshape((-1, 1)) gt_file = relative_time.copy() else: relative_time = video.pose.relative_segments() if self._features is None: features = video.features() else: features = self._features[video.global_range] if opt.concat > 1: video_feature_concat = features[:] last_frame = features[-1] for i in range(opt.concat - 1): video_feature_concat = np.roll(video_feature_concat, -1, axis=0) video_feature_concat[-1] = last_frame features = join_data(features, video_feature_concat, np.hstack) relative_time *= opt.time_weight if not opt.label: temp_feature_list = join_data(None, (names, idxs, gt_file, features, relative_time), np.hstack) else: labels = np.asarray(video._z).reshape((-1, 1)) temp_feature_list = join_data(None, (names, idxs, gt_file, features, relative_time, labels), np.hstack) self._feature_list = join_data(self._feature_list, temp_feature_list, np.vstack)
def _tmp_read(self): del self.features self.features = None tmp_path = ops.join(self.config["dataset_root"], self.tmp) tmp_list = [int(i.split('.')[0]) for i in os.listdir(tmp_path)] for file_idx in sorted(tmp_list): logger.debug(file_idx) tmp_file_path = ops.join(tmp_path, '%d.npy' % file_idx) tmp_feat = np.load(tmp_file_path) self.features = join_data(self.features, tmp_feat, np.vstack) os.remove(tmp_file_path)
def labels(self, new_labels): self._labels = join_data(self._labels, new_labels, np.hstack) self._sizes += [self.size] * len(new_labels)
def data(self, new_data): self._data = join_data(self._data, new_data, np.vstack)
def accuracy(train_loader, model, epoch, best_acc, resume=False, idx2name=None): """Calculate accuracy of trained embedding either just trained or with pretrained model""" if resume: logger.debug('Load the model for epoch %d' % epoch) model.load_state_dict(load_model(epoch)) else: model.cpu() model.eval() acc = AverageMeter() logger.debug('Evaluation') with torch.no_grad(): anchors = model.anchors().detach().numpy() video_save_feat = None name_cur = None for i, (input, k, name) in enumerate(train_loader): input = input.float() k = k.numpy() k = np.argmax(k, axis=1) output = model.embedded(input).cpu().numpy() if opt.save: name = name.numpy() name_cur = name[0] if name_cur is None else name_cur for idx, f in enumerate(output): if name_cur == int(name[idx]): video_save_feat = join_data(video_save_feat, f, np.vstack) else: np.savetxt( join( opt.data, 'embed', '%d_%d_%s_' % (opt.embed_dim, opt.data_type, str(opt.lr)) + idx2name[name_cur]), video_save_feat) video_save_feat = join_data(None, f, np.vstack) name_cur = int(name[idx]) dists = -2 * np.dot(output, anchors.T) + np.sum(anchors ** 2, axis=1) \ + np.sum(output ** 2, axis=1)[:, np.newaxis] dist = np.sum(np.argmin(dists, axis=1) == k, dtype=float) / input.size(0) acc.update(dist, input.size(0)) if i % 100 == 0 and i: logger.debug('Iter: [{0}/{1}]\t' 'Accuracy {acc.val:.4f} ({acc.avg:.4f})\t'.format( i, len(train_loader), acc=acc)) if opt.save_feat: np.savetxt( join( opt.data, 'embed', '%d_%d_%s_' % (opt.embed_dim, opt.data_type, str(opt.lr)) + idx2name[name_cur]), video_save_feat) np.savetxt( join( opt.data, 'embed', 'anchors_%s_%d_%d_%s' % (opt.subaction, opt.embed_dim, opt.data_type, str(opt.lr))), anchors) if best_acc < acc.avg: best_acc = acc.avg logger.debug( 'Accuracy {acc.val:.4f} ({acc.avg:.4f})\t(best:{0:.4f})'. format(best_acc, acc=acc)) return best_acc