def _tmp_save(self, idx): dir_check(ops.join(self.config["dataset_root"], self.tmp)) np.save( ops.join(self.config["dataset_root"], self.tmp, '%d.npy' % idx), self.features) del self.features self.features = None
def __init__(self, config, model, dataset, folder_probs): self.config = config self.folder_probs = folder_probs ctx = mp.get_context('spawn') self._queue = ctx.Queue() torch.manual_seed(self.config["seed"]) self.model = model self.model.cpu() self.model.eval() self.dataset = dataset dir_check(ops.join(self.config["dataset_root"], config["out_probs"])) for i in range(len(self.dataset)): self._queue.put(i)
def save_probs(config, dataloader, model): dir_check(ops.join(config["dataset_root"], config["out_probs"])) logger.debug('forward data and save probabilities') torch.manual_seed(config["seed"]) model.eval() model.cpu() # not sure that need it with torch.no_grad(): for idx, (features, name) in enumerate(dataloader): output = model(features).numpy() np.save( ops.join(config["dataset_root"], config["out_probs"], name), output)
def test(config, video_name, label2idx, idx2label, folder_probs, folder_seg): print(folder_probs, video_name) probs = np.load(ops.join(folder_probs, video_name + '.npy')) if np.min(probs) == 0: probs[probs == 0] = np.inf probs[probs == np.inf] = np.min(probs) log_probs = np.log(probs) mean_lengths = np.squeeze(np.ones((len(idx2label), 1)) * 150) length_model = FlatModel(mean_lengths, max_length=2000) file_grammar_path = ops.join(config["transcripts"], video_name + '.txt') grammar = PathGrammar(file_grammar_path, label2idx) viterbi_decoder = Viterbi(grammar, length_model, frame_sampling=20, max_hypotheses=50000) score, labels, segments = viterbi_decoder.decode(log_probs) # write result to file dir_check(ops.join(folder_seg)) out_file = ops.join(folder_seg, video_name + '.txt') with open(out_file, 'w') as f: for label in labels: f.write('%s\n' % idx2label[label]) # read gt video_gt = [] with open(ops.join(config["gt"], video_name + '.txt'), 'r') as f: for line in f: line = line.strip() idx = label2idx[line] video_gt.append(idx) # set labels and gt to same length if len(labels) < len(video_gt): # do padding with last label: labels_new = np.squeeze(np.zeros((len(video_gt), 1))) labels_new[:len(labels)] = labels labels_new[len(labels):len(video_gt)] = labels[-1] labels = labels_new if len(labels) > len(video_gt): labels = labels[:len(video_gt)] return labels, video_gt
def plot(self, iter=0, show=True, gt_plot=0, prefix=''): if iter is not None: self._counter = iter plt.scatter(self._result[..., 0], self._result[..., 1], c=self._labels, s=self._sizes, alpha=0.5) plt.grid(True) if self._save: # plt.figure(figsize=(1)) dir_check(join(opt.dataset_root, 'plots')) dir_check(join(opt.dataset_root, 'plots', opt.subaction)) pose_segm = ['!pose_', ''][opt.pose_segm] name = ['iter%d' % self._counter, 'gt', 'time'][gt_plot] name += '_%s.png' % self._mode name = prefix + '%s_%s_' % (opt.subaction, opt.tr_type) + name # if opt.grid_search: weight = ['w%d_' % int(opt.time_weight), ''][opt.time_weight == 1] folder_name = '%s_%slr_%.1e_dim_%d_ep_%d' % \ (opt.prefix, pose_segm, opt.lr, opt.embed_dim, opt.epochs) folder_name = opt.prefix + weight + folder_name dir_check( join(opt.dataset_root, 'plots', opt.subaction, folder_name)) plt.savefig(join(opt.dataset_root, 'plots', opt.subaction, folder_name, name), dpi=400) # else: # plt.savefig(join(opt.dataset_root, 'plots', opt.subaction, name), dpi=400) if show: plt.show()
def save_probs(self, n_video): features, name = self.dataset[n_video] with torch.no_grad(): output = self.model(features).numpy() # compute softmax for linear output ... remove this if neede output = self.softmax(output) if self.config["get_cond_probs"] > 0: # avoid divide by zero ... if np.min(output) == 0: output[output == 0] = np.inf output[output == np.inf] = np.min(output) # go to log space .... log_probs = np.log(output) # prior = np.loadtxt(ops.join(opt.dataset_root, opt.prior)) prior = np.load( ops.join( self.config["model_folder"], '%s%d.probs.npy' % (self.config["log_str"], self.config["test_epoch"]))) prior = np.squeeze(prior) log_prior = np.log(prior) # log_prior = np.nan_to_num(log_prior) log_prior[prior == 0] = 0 log_probs = log_probs - log_prior # set bg separtly log_probs[:, -1] = np.mean(log_probs[:, :-1]) if np.max(log_probs) > 0: log_probs -= 2 * np.max(log_probs) output = np.exp(log_probs) dir_check(folder_probs) np.save(ops.join(folder_probs, name), output)
""" From one hot encoding labeling to my format of gt """ __author__ = 'Anna Kukleva' __date__ = 'September 2018' import os import re import numpy as np from utils.arg_pars import logger, opt from utils.utils import dir_check actions = ['coffee', 'changing_tire', 'cpr', 'jump_car', 'repot'] gt_folder = '/media/data/kukleva/lab/YTInstructions/VISION_txt_annot' dir_check(opt.gt) label2idx = {} idx2label = {} videos = {} for root, dirs, files in os.walk(gt_folder): for filename in files: segmentation = [] with open(os.path.join(root, filename), 'r') as f: for line in f: line = line.split() line = list(map(lambda x: int(x), line)) label = -1 if line[-1] == 1 else np.where(line)[0][0] if label != -1:
def save_likelihood(self): """Used for multiprocessing""" dir_check(os.path.join(opt.data, 'likelihood')) np.savetxt(os.path.join(opt.data, 'likelihood', self.name), self._likelihood_grid)
labels_new[len(labels):len(video_gt)] = labels[-1] labels = labels_new if len(labels) > len(video_gt): labels = labels[:len(video_gt)] return labels, video_gt if __name__ == '__main__': with open('../config/train_config_relu_org_files_2048dim.json' ) as config_file: config = json.load(config_file) # create dir if needed dir_check(ops.join(config["out_segmentation"])) # sample code for computing segmentation and accuracy for every 2 epochs int the range of 0-50 all_res = [] epochs_processed = [] for i in range(0, 50, 2): try: config["test_epoch"] = i folder_probs = ops.join(config["out_probs"], str(config["test_epoch"])) folder_seg = ops.join(config["out_segmentation"], str(config["test_epoch"])) logger_setup(config)
def training(config, data, **kwargs): """Training pipeline for embedding. Args: data: iterator within dataset epochs: how much training epochs to perform n_subact: number of subactions in current complex activity mnist: if training with mnist dataset (just to test everything how well it works) Returns: trained pytorch model """ logger.debug('create model') torch.manual_seed(config["seed"]) model = kwargs['model'] loss = kwargs['loss'] optimizer = kwargs['optimizer'] create_dataloader = lambda x: \ torch.utils.data.DataLoader(x, batch_size=config["batch_size"], shuffle=True, num_workers=config["num_workers"]) if config["sparse"]: dataset = data data = create_dataloader(dataset) cudnn.benchmark = True batch_time = Averaging() data_time = Averaging() losses = Averaging() adjustable_lr = config["lr"] logger.debug('epochs: %s', config["epochs"]) for epoch in range(config["epochs"]): model.cuda() model.train() logger.debug('Epoch # %d' % epoch) if config["lr_adj"]: if epoch % (50) == 0 and epoch > 0: adjustable_lr = adjust_lr(optimizer, adjustable_lr) logger.debug('lr: %f' % adjustable_lr) end_time = time.time() # start_time = time.time(); print(len(data)) train_acc_epoch = torch.zeros((1, 1)) time_epoch = time.time() for i, (features, labels) in enumerate(data): # print i data_time.update(time.time() - end_time) features = features.float().cuda(non_blocking=True) labels = labels.long().cuda() #labels_one_hot = _to_one_hot(labels, config["n_classes"]) output = model(features) max_index = output.max(dim=1)[1] train_acc = (max_index == labels).sum() train_acc_epoch = train_acc_epoch + train_acc loss_values = loss(output, labels) losses.update(loss_values.item(), features.size(0)) optimizer.zero_grad() loss_values.backward() optimizer.step() batch_time.update(time.time() - end_time) end_time = time.time() ''' if i % 5000 == 0 and i: logger.debug('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(data), batch_time=batch_time, data_time=data_time, loss=losses)) # print(time.time() - start_time); # start_time = time.time(); ''' logger.debug('duration: %f' % (time.time() - time_epoch)) logger.debug('train_err: %f' % (1 - ((train_acc_epoch.cpu()).numpy() / (len(data) * config["batch_size"])))) logger.debug('loss: %f' % losses.avg) losses.reset() if epoch % 1 == 0 and config["save_model"]: save_dict = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } dir_check(config["model_folder"]) logger.debug('Saving model to: %s' % ops.join(config["model_folder"], '%s%d.pth.tar' % (config["log_str"], epoch))) torch.save( save_dict, ops.join(config["model_folder"], '%s%d.pth.tar' % (config["log_str"], epoch))) logger.debug('Saving probs to: %s' % ops.join(config["model_folder"], '%s%d.probs' % (config["log_str"], epoch))) data.dataset.save_probs( ops.join(config["model_folder"], '%s%d.probs' % (config["log_str"], epoch))) if config["sparse"]: dataset.next_epoch() data = create_dataloader(dataset) if config["save_model"]: save_dict = { 'epoch': config["epochs"], 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } dir_check(config["model_folder"]) logger.debug('Saving model to: %s' % ops.join(config["model_folder"], '%s%d.pth.tar' % (config["log_str"], epoch))) torch.save( save_dict, ops.join(config["model_folder"], '%s%d.pth.tar' % (config["log_str"], epoch))) logger.debug('Saving probs to: %s' % ops.join(config["model_folder"], '%s%d.probs' % (config["log_str"], epoch))) data.dataset.save_probs( ops.join(config["model_folder"], '%s%d.probs' % (config["log_str"], epoch))) return model
dataset = TestDataset(config) dataloader = iter(dataset) save_probs(config, dataloader, model) if __name__ == '__main__': # load config file with open('../config/train_config_relu_org_files_2048dim.json' ) as config_file: config = json.load(config_file) print(config["out_probs"]) logger_setup(config) # create dir if needed dir_check(ops.join(config["out_probs"])) # sample code for computing output probabilities for every two epochs in the range of 0-50 len_last_str = 0 epochs_processed = [] for i in range(0, 50, 2): config["test_epoch"] = i folder_probs = ops.join(config["out_probs"], str(config["test_epoch"])) len_last_str = len(str(i)) logger_setup(config) try: save_mp(config, folder_probs) epochs_processed.append(i) except:
def training(train_loader, epochs, n_subact=0, save=True, **kwargs): """Training pipeline for embedding. Args: train_loader: iterator within dataset epochs: how much training epochs to perform n_subact: number of subactions in current complex activity mnist: if training with mnist dataset (just to test everything how well it works) Returns: trained pytorch model """ logger.debug('create model') torch.manual_seed(opt.seed) np.random.seed(opt.seed) try: model = kwargs['model'] loss = kwargs['loss'] optimizer = kwargs['optimizer'] except KeyError: model = Embedding(embed_dim=opt.embed_dim, feature_dim=opt.feature_dim, n_subact=n_subact).cuda() loss = RankLoss(margin=0.2).cuda() optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) cudnn.benchmark = True batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() vis = Visual() best_acc = -1 _lr = opt.lr logger.debug('epochs: %s', epochs) loss_previous = np.inf for epoch in range(epochs): model.cuda() model.train() logger.debug('Epoch # %d' % epoch) if opt.lr_adj: # if epoch in [int(epochs * 0.3), int(epochs * 0.7)]: # if epoch in [int(epochs * 0.5)]: if epoch % 30 == 0 and epoch > 0: _lr = adjust_lr(optimizer, _lr) logger.debug('lr: %f' % _lr) end = time.time() for i, (input, k, _) in enumerate(train_loader): # TODO: not sure that it's necessary data_time.update(time.time() - end) input = input.float().cuda(non_blocking=True) k = k.float().cuda() output = model(input) loss_values = loss(output, k) losses.update(loss_values.item(), input.size(0)) optimizer.zero_grad() loss_values.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % 100 == 0 and i: logger.debug( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) logger.debug('loss: %f' % losses.avg) losses.reset() if save: save_dict = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } dir_check(join(opt.dataset_root, 'models')) dir_check(join(opt.dataset_root, 'models', kwargs['name'])) torch.save( save_dict, join(opt.dataset_root, 'models', kwargs['name'], '%s.pth.tar' % opt.log_str)) return model
def training(dataloader, **kwargs): """Training pipeline for embedding. Args: dataloader: iterator within dataset epochs: how much training epochs to perform n_subact: number of subactions in current complex activity mnist: if training with mnist dataset (just to test everything how well it works) Returns: trained pytorch model """ logger.debug('create model') torch.manual_seed(opt.seed) model = kwargs['model'] loss = kwargs['loss'] optimizer = kwargs['optimizer'] cudnn.benchmark = True batch_time = Averaging() data_time = Averaging() losses = Averaging() adjustable_lr = opt.lr logger.debug('epochs: %s', opt.epochs) for epoch in range(opt.epochs): model.train() logger.debug('Epoch # %d' % epoch) if opt.lr_adj: if epoch % 5 == 0 and epoch > 0: adjustable_lr = adjust_lr(optimizer, adjustable_lr) logger.debug('lr: %f' % adjustable_lr) end = time.time() for i, (features, labels) in enumerate(dataloader): data_time.update(time.time() - end) features = features.cuda(non_blocking=True) # features = features.float().cuda(non_blocking=True) labels = labels.long().cuda() output = model(features) loss_values = loss(output, labels) losses.update(loss_values.item(), labels.size(0)) optimizer.zero_grad() loss_values.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % 100 == 0 and i: logger.debug( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(dataloader), batch_time=batch_time, data_time=data_time, loss=losses)) logger.debug('loss: %f' % losses.avg) losses.reset() if epoch % 1 == 0: save_dict = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } dir_check(ops.join(opt.dataset_root, 'models')) torch.save( save_dict, ops.join(opt.dataset_root, 'models', '%s%d.pth.tar' % (opt.log_str, epoch))) if opt.save_model: save_dict = { 'epoch': opt.epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } dir_check(ops.join(opt.dataset_root, 'models')) torch.save( save_dict, ops.join(opt.dataset_root, 'models', '%s%d.pth.tar' % (opt.log_str, opt.epochs))) return model