def genInterpolations(): model = torch.load(args.loadModel) num_samples = 100 interpol_length = 11 dataset = H36M(opt, 'val') for i in range(num_samples): ind1, ind2 = np.random.randint(0, len(dataset), 2) _, poseMap1 = dataset.__getitem__(ind1) _, poseMap2 = dataset.__getitem__(ind2) poseMap1 = poseMap1[None, ...] poseMap2 = poseMap2[None, ...] pts1 = torch.FloatTensor(getPreds(poseMap1)) pts2 = torch.FloatTensor(getPreds(poseMap2)) data1, data2 = Variable(pts1), Variable(pts2) if args.cuda: data1 = data1.cuda() data2 = data2.cuda() mu1, logvar1 = model.encode(data1.view(-1, 32)) z1 = model.reparameterize(mu1, logvar1) mu2, logvar2 = model.encode(data2.view(-1, 32)) z2 = model.reparameterize(mu2, logvar2) result = model.decode(z1) for j in range(1, interpol_length + 1): inter_z = z1 + j * (z2 - z1) / interpol_length interpol_pose = model.decode(inter_z) result = torch.cat((result, interpol_pose)) # print(result.shape) save_image(makeSkel(result.data * 4, (255, 0, 0)), '../exp/vae_pose/genInterpolations/interpol_' + str(i) + '.png', nrow=(interpol_length + 1) // 3)
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc, Mpjpe, Loss3D = AverageMeter(), AverageMeter(), AverageMeter( ), AverageMeter() nIters = len(dataLoader) bar = Bar('==>', max=nIters) for i, (input, target2D, target3D, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target2D_var = torch.autograd.Variable(target2D).float().cuda() target3D_var = torch.autograd.Variable(target3D).float().cuda() depMap, depth = model(input_var, target2D_var) depthPridict = depth[opt.nStack - 1] if opt.DEBUG >= 2: gt = getPreds(target2D.cpu().numpy()) * 4 pred = getPreds((depMap[opt.nStack - 1].data).cpu().numpy()) * 4 debugger = Debugger() debugger.addImg( (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showImg() debugger.saveImg('debug/{}.png'.format(i)) loss = 0 for k in range(opt.nStack): loss += criterion(depth[k], target3D_var[:, :, 2]) Loss.update(loss.data[0], input.size(0)) #Acc.update(Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target2D_var.data).cpu().numpy())) mpjpe, num3D = MPJPE2(target3D.cpu().numpy(), (depthPridict.data).cpu().numpy(), meta) Mpjpe.update(mpjpe, num3D) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Mpjpe {Mpjpe.avg:.6f} ({Mpjpe.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, split=split, Mpjpe=Mpjpe) bar.next() bar.finish() return Loss.avg, Acc.avg, Mpjpe.avg, Loss3D.avg
def main(): pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") opt = opts().parse() if opt.loadModel != 'none': model = torch.load(opt.loadModel).cuda() else: model = torch.load('../../tr_models/hgreg-3d.pth').cuda() action_dirs_tr = os.listdir(opt.demo + '/train_frames') action_dirs_vl = os.listdir(opt.demo + '/val_frames') my_tr_dict = {} for a_dir in action_dirs_tr: all_frames = os.listdir('{}/{}'.format(opt.demo + '/train_frames/', a_dir)) n_frames = len(all_frames) frames_seq = np.zeros((n_frames, 16, 3)) for idx, frame in enumerate(all_frames): img = cv2.imread('{}/{}/{}'.format(opt.demo + '/train_frames/', a_dir, frame)) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) #point_3d are the 3 dimensional co-ordinates of the 16 joints point_3d = np.concatenate([pred, (reg + 1) / 2. * 256], axis=1) frames_seq[idx, :, :] = point_3d my_tr_dict[a_dir] = frames_seq print('{} done!'.format(a_dir)) with open(opt.demo + '/mini_train_data.pkl', 'wb') as handle: pickle.dump(my_tr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) my_vl_dict = {} for a_dir in action_dirs_vl: all_frames = os.listdir('{}/{}'.format(opt.demo + '/val_frames/', a_dir)) n_frames = len(all_frames) frames_seq = np.zeros((n_frames, 16, 3)) for idx, frame in enumerate(all_frames): img = cv2.imread('{}/{}/{}'.format(opt.demo + '/val_frames/', a_dir, frame)) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) #point_3d are the 3 dimensional co-ordinates of the 16 joints point_3d = np.concatenate([pred, (reg + 1) / 2. * 256], axis=1) frames_seq[idx, :, :] = point_3d my_vl_dict[a_dir] = frames_seq print('{} done!'.format(a_dir)) with open(opt.demo + '/mini_val_data.pkl', 'wb') as handle: pickle.dump(my_vl_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
def check_logic(): preproc = nn.ModuleList([model.conv1_, model.bn1, model.relu, model.r1, model.maxpool, model.r4, model.r5 ]) hg = model.hourglass[0] lower_hg = getEncoder(hg) data_loader = torch.utils.data.DataLoader( H36M(opts, 'val'), batch_size = 1, shuffle = False, num_workers = int(ref.nThreads) ) for k, (input, target) in enumerate(data_loader): if(k>nSamples): break input_var = torch.autograd.Variable(input).float().cuda() for mod in preproc: input_var = mod(input_var) for mod in lower_hg: input_var = mod(input_var) #decode ups = input_var upper_hg = nn.ModuleList(getDecoder(hg)) for mod in upper_hg: ups = mod(ups) Residual = model.Residual for j in range(nModules): ups = Residual[j](ups) lin_ = model.lin_ ups = lin_[0](ups) tmpOut = model.tmpOut ups = tmpOut[0](ups) pred = eval.getPreds(ups.data.cpu().numpy())*4 gt = eval.getPreds(target.cpu().numpy()) * 4 # init = getPreds(input.numpy()[:, 3:]) debugger = Debugger() img = (input[0].numpy()[:3].transpose(1, 2, 0)*256).astype(np.uint8).copy() print(img.shape) debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) # debugger.addPoint2D(init[0], (0, 255, 0)) debugger.showAllImg(pause = True)
def test(epoch): model.eval() test_loss = 0 for i, (_, poseMap) in enumerate(test_loader): pts = torch.FloatTensor(getPreds(poseMap.cpu().numpy())) data = Variable(pts, volatile=True) if args.cuda: data = data.cuda() recon_batch, mu, logvar = model(data) test_loss += loss_function(recon_batch, data, mu, logvar).data[0] test_loss /= len(test_loader.dataset) print('====> Test set loss: {:.4f}'.format(test_loss))
def genInterpolations(): model.eval() num_samples = 100 interpol_length = 11 dataset = H36M_Comb_VAE(opt, 'val') for i in range(num_samples): print("Generating video for sample ", i) ind1,ind2 = np.random.randint(0,len(dataset),2) inpImg1, _, poseMap1 = dataset.__getitem__(ind1) inpImg2, _, poseMap2 = dataset.__getitem__(ind2) poseMap1 = poseMap1[None,...] poseMap2 = poseMap2[None,...] inpImg1 = inpImg1[None, ...] inpImg2 = inpImg2[None, ...] input_img1 = Variable(inpImg1).float().cuda() input_pose1 = Variable(torch.FloatTensor(getPreds(poseMap1.cpu().numpy())).view(-1, 32)).float().cuda() input_img2 = Variable(inpImg2).float().cuda() input_pose2 = Variable(torch.FloatTensor(getPreds(poseMap2.cpu().numpy())).view(-1, 32)).float().cuda() _, _, _, h_pose1 = model(input_img1, input_pose1) _, _, _, h_pose2 = model(input_img2, input_pose2) result_img = model.forward_p_to_i(h_pose1) result_pose = makeSkel_LR_64(model.decode_pose(h_pose1)) for j in range(1,interpol_length+1): inter_h = h_pose1 + j*(h_pose2-h_pose1)/interpol_length interpol_image, interpol_pose = model.forward_p_to_i(inter_h), makeSkel_LR_64(model.decode_pose(inter_h)) result_img = torch.cat((result_img,interpol_image)) result_pose = torch.cat((result_img, interpol_pose))
def test(epoch): model.eval() test_loss = 0 for batch_idx, (_, poseMap, reg) in enumerate(train_loader): pred = getPreds(poseMap.cpu().numpy()) * 4 reg = reg[:, :, 2].cpu().numpy().reshape(-1, ref.nJoints, 1) pts = torch.FloatTensor(np.concatenate([pred, (reg + 1) / 2. * 256], axis=2)) data = Variable(pts, volatile = True) if args.cuda: data = data.cuda() recon_batch, mu, logvar = model(data) test_loss += loss_function(recon_batch, data, mu, logvar).data[0] test_loss /= len(test_loader.dataset) print('====> Test set loss: {:.4f}'.format(test_loss))
def genInterpolations(): model = torch.load(args.loadModel) num_samples = 100 interpol_length = 11 dataset = H36M_3D(opt, 'val') for i in range(num_samples): ind1,ind2 = np.random.randint(0,len(dataset),2) _, poseMap1, reg1 = dataset.__getitem__(ind1) _, poseMap2, reg2 = dataset.__getitem__(ind2) poseMap1 = poseMap1[None,...] poseMap2 = poseMap2[None,...] reg1 = reg1[None,:,2, None] reg2 = reg2[None,:,2, None] pred1 = getPreds(poseMap1) pred2 = getPreds(poseMap2) pts1 = torch.FloatTensor(np.concatenate([pred1, (reg1 + 1) / 8. * 256], axis=2)) pts2 = torch.FloatTensor(np.concatenate([pred2, (reg2 + 1) / 8. * 256], axis=2)) data1, data2 = Variable(pts1), Variable(pts2) if args.cuda: data1 = data1.cuda() data2 = data2.cuda() mu1, logvar1 = model.encode(data1.view(-1, 48)) z1 = model.reparameterize(mu1, logvar1) mu2, logvar2 = model.encode(data2.view(-1, 48)) z2 = model.reparameterize(mu2, logvar2) result = model.decode(z1) for j in range(1,interpol_length+1): inter_z = z1 + j*(z2-z1)/interpol_length interpol_pose = model.decode(inter_z) result = torch.cat((result,interpol_pose)) # print(result.shape) save_image(make3DSkel(result.data.cpu().numpy()* 4, (1, 0, 0)), '../exp/vae_3dpose/genInterpolations/interpol_' + str(i) + '.png', nrow=(interpol_length+1)//3)
def genRecon(): model = torch.load(args.loadModel) for batch_idx, (_, poseMap, reg) in enumerate(test_loader): pred = getPreds(poseMap.cpu().numpy()) reg = reg[:, :, 2].cpu().numpy().reshape(-1, ref.nJoints, 1) pts = torch.FloatTensor(np.concatenate([pred, (reg + 1) / 8. * 256], axis=2)) data = Variable(pts) if args.cuda: data = data.cuda() recon_batch, mu, logvar = model(data) n = min(data.size(0), 2) orig, recon = make3DSkel(data[:n].data.cpu().numpy() * 4, (0, 0, 1)), make3DSkel(recon_batch[:n].data.cpu().numpy() * 4, (1, 0, 0)) comparison = torch.cat((orig, recon)) save_image(comparison, '../exp/vae_3dpose/val_results/reconstruction_' + str(batch_idx) + '.png', nrow=n) print("Saving reconstruction results for epoch progress : {0:.0f}".format(100*batch_idx / len(test_loader)))
def train(epoch): model.train() train_loss = 0 for batch_idx, (_, poseMap) in enumerate(train_loader): pts = torch.FloatTensor(getPreds(poseMap.cpu().numpy())) data = Variable(pts) if args.cuda: data = data.cuda() optimizer.zero_grad() recon_batch, mu, logvar = model(data) loss = loss_function(recon_batch, data, mu, logvar) loss.backward() train_loss += loss.data[0] optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0] / len(data))) # if(batch_idx/len(train_loader) > 0.01): # break if batch_idx % args.save_interval == 0: n = min(data.size(0), 4) orig, recon = makeSkel(data[:n] * 4, (0, 0, 255)), makeSkel( recon_batch[:n] * 4, (255, 0, 0)) comparison = torch.cat((orig, recon)) save_image(comparison, '../exp/vae_pose/results/reconstruction_' + str(epoch) + "_" + str(batch_idx) + '.png', nrow=n) print( "Saving reconstruction results for epoch : {0}, progress : {1:.0f}" .format(epoch, 100 * batch_idx / len(train_loader))) if batch_idx % args.val_interval == 0: # test(epoch) torch.save( model, os.path.join( saveDir_models, 'model_{0}_{1:.0f}.pth'.format( epoch, 100 * batch_idx / len(train_loader)))) print("Saving model for epoch : {0}, progress : {1:.0f}".format( epoch, 100 * batch_idx / len(train_loader))) print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_loader.dataset)))
def main(): opt = opts().parse() if opt.loadModel != 'none': model = torch.load(opt.loadModel).cuda() else: model = torch.load( '/home/sehgal.n/3d_pose/pytorch-pose-hg-3d/exp/hgreg-3d.pth').cuda( ) opt.demo = '/scratch/sehgal.n/datasets/synthetic/SYN_RR_amir_180329_0624_G20190212_1843_P2000_A00/images/image_000001.png' img = cv2.imread(opt.demo) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) plot_pose3d(img, pred, reg)
def genRecon(): model = torch.load(args.loadModel) for batch_idx, (_, poseMap) in enumerate(test_loader): pts = torch.FloatTensor(getPreds(poseMap.cpu().numpy())) data = Variable(pts) if args.cuda: data = data.cuda() recon_batch, mu, logvar = model(data) n = min(data.size(0), 4) orig, recon = makeSkel(data[:n] * 4, (0, 0, 255)), makeSkel(recon_batch[:n] * 4, (255, 0, 0)) comparison = torch.cat((orig, recon)) save_image(comparison, '../exp/vae_pose/val_results/reconstruction_' + str(batch_idx) + '.png', nrow=n) print("Saving reconstruction results for epoch progress : {0:.0f}". format(100 * batch_idx / len(test_loader)))
def main(): opt = opts().parse() if opt.loadModel != 'none': model = torch.load(opt.loadModel).cuda() else: model = torch.load('hgreg-3d.pth').cuda() img = cv2.imread(opt.demo) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) debugger = Debugger() debugger.addImg((input[0].numpy().transpose(1, 2, 0)*256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis = 1)) debugger.showImg(pause = True) debugger.show3D()
def generate(imageName): process_image(imageName) model = torch.load('../model/Stage3/model_10.pth', map_location=lambda storage, loc: storage) img = cv2.imread(imageName) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() output = model(input_var) print(output[-2].data[0][-2].shape) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) print(pred, (reg + 1) / 2. * 256) debugger = Debugger() debugger.addImg( (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis=1)) debugger.showImg(pause=True) debugger.show3D() '''
def main(): pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") opt = opts().parse() if opt.loadModel != 'none': model = torch.load(opt.loadModel).cuda() else: model = torch.load('../../tr_models/hgreg-3d.pth').cuda() #opt.demo has the path to dir containing frames of demo video all_frames = os.listdir(opt.demo) n_frames = len(all_frames) #specifics dir_name = opt.demo.split('/')[-1] save_path = '../../output/demo/'+dir_name try: os.makedirs(save_path) except OSError: pass for idx, frame in enumerate(all_frames): print('processing frame {}'.format(idx)) img = cv2.imread(opt.demo+'/'+frame) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) debugger = Debugger() debugger.addImg((input[0].numpy().transpose(1, 2, 0)*256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis = 1)) # debugger.showImg(pause = True) debugger.saveImg(path=save_path+'/frame{}.jpg'.format(idx)) debugger.save3D(path=save_path+'/frame_p3d{}.jpg'.format(idx)) print('frame {} done'.format(idx))
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc = AverageMeter(), AverageMeter() preds = [] nIters = len(dataLoader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, target, target2, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target_var = torch.autograd.Variable(target).float().cuda() target_var2 = torch.autograd.Variable(target2).float().cuda() #print( input_var) output = model(input_var) #print(output[-1].size()) if opt.DEBUG >= 2: gt = getPreds(target.cpu().numpy()) * 4 pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4 debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype( np.uint8).copy() debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showAllImg(pause=True) loss = criterion(output[0], target_var) for k in range(1, opt.nStack): loss += criterion(output[k], target_var) Loss.update(loss.data[0], input.size(0)) Acc.update( Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target_var.data).cpu().numpy())) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy() input_[0] = Flip(input_[0]).copy() inputFlip_var = torch.autograd.Variable( torch.from_numpy(input_).view(1, input_.shape[1], ref.inputRes, ref.inputRes)).float().cuda( opt.GPU) outputFlip = model(inputFlip_var) outputFlip = ShuffleLR( Flip((outputFlip[opt.nStack - 1].data).cpu().numpy()[0])).reshape( 1, ref.nJoints, 64, 64) output_ = ( (output[opt.nStack - 1].data).cpu().numpy() + outputFlip) / 2 preds.append( finalPreds(output_, meta['center'], meta['scale'], meta['rotate'])[0]) Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split) bar.next() bar.finish() return {'Loss': Loss.avg, 'Acc': Acc.avg}, preds
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc, Mpjpe, Loss3D = AverageMeter(), AverageMeter(), AverageMeter( ), AverageMeter() nIters = len(dataLoader) bar = Bar('==>', max=nIters) for i, (input, target2D, target3D, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target2D_var = torch.autograd.Variable(target2D).float().cuda() target3D_var = torch.autograd.Variable(target3D).float().cuda() output = model(input_var) reg = output[opt.nStack] if opt.DEBUG >= 2: gt = getPreds(target2D.cpu().numpy()) * 4 pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4 debugger = Debugger() debugger.addImg( (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showImg() debugger.saveImg('debug/{}.png'.format(i)) # FusioCriterion is an Autograd funciton which can be called only once in the forward pass. So it is defined again in every iteration. # Don't ask why. loss = FusionCriterion(opt.regWeight, opt.varWeight)(reg, target3D_var) Loss3D.update(loss.data[0], input.size(0)) for k in range(opt.nStack): loss += criterion(output[k], target2D_var) Loss.update(loss.data[0], input.size(0)) Acc.update( Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target2D_var.data).cpu().numpy())) mpjpe, num3D = MPJPE((output[opt.nStack - 1].data).cpu().numpy(), (reg.data).cpu().numpy(), meta) if num3D > 0: Mpjpe.update(mpjpe, num3D) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Loss3D {loss3d.avg:.6f} | Acc {Acc.avg:.6f} | Mpjpe {Mpjpe.avg:.6f} ({Mpjpe.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, Mpjpe=Mpjpe, loss3d=Loss3D) bar.next() bar.finish() return Loss.avg, Acc.avg, Mpjpe.avg, Loss3D.avg
def poseEstimation(self, tracked_person): person_id = tracked_person.person_id try: curImage = self.bridge.imgmsg_to_cv2(self.frameInfo.image_frame) person_image = curImage[ int(tracked_person.bbox.top):int(tracked_person.bbox.top + tracked_person.bbox.height), int(tracked_person.bbox.left):int(tracked_person.bbox.left + tracked_person.bbox.width)] except CvBridgeError as e: rospy.logerr(e) # Resize input image rospy.logdebug("person image shape: {}".format(person_image.shape)) if person_image.shape != self.image_shape: h, w = person_image.shape[0], person_image.shape[1] center = torch.FloatTensor((w / 2, h / 2)) scale = 1.0 * max(h, w) res = 256 input_image = Crop(person_image, center, scale, 0, res) else: input_image = person_image # Feed input image to model rospy.loginfo("feeding image to model") input = torch.from_numpy(input_image.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() # lock when using model to estimate pose self.lock.acquire() try: output = self.model(input_var) finally: self.lock.release() rospy.logdebug("got output from model") # Get 2D pose rospy.logdebug("Rendering 2D pose") pose2D = getPreds((output[-2].data).cpu().numpy())[0] * 4 # Get 3D pose rospy.logdebug("Rendering 3D pose") reg = (output[-1].data).cpu().numpy().reshape(pose2D.shape[0], 1) pose3D = np.concatenate([pose2D, (reg + 1) / 2. * 256], axis=1) rospy.logdebug("pose 3d shape: {}".format(pose3D.shape)) for pose in pose3D: joint = Point() joint.x = pose[0] joint.y = pose[1] joint.z = pose[2] tracked_person.person_pose.append(joint) # publish person if self.publish_person: self.person_pub.publish(tracked_person) self.lock.acquire() try: self.frameInfo.persons.append(tracked_person) finally: self.lock.release() rospy.logdebug("pose3D: \n {}".format(pose3D)) # Save pose image if self.save_pose_image: cv2.imwrite( pkg_path + '/scripts/debug/original/ogImg_' + str(self.frame_id) + '.png', self.cv_image) cv2.imwrite( pkg_path + '/scripts/debug/input/inputImg_' + str(self.frame_id) + '.png', input_image) self.debugger.addImg(input_image, imgId=self.frame_id) self.debugger.addPoint2D(pose2D, (255, 0, 0), imgId=self.frame_id) self.debugger.saveImg(pkg_path + '/scripts/debug/pose/poseImg_' + str(self.frame_id) + '.png', imgId=self.frame_id) if self.save_pose_file: file_name = pkg_path + '/pose_file/pose_{:04d}.txt'.format( self.frame_id) with file(file_name, 'w') as outfile: np.savetxt(outfile, pose3D, fmt='%-7.2f') rospy.loginfo("Person {} processing finished".format(person_id))
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc = AverageMeter(), AverageMeter() preds = [] nIters = len(dataLoader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, targets, action, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda(opt.GPU) target_var = [] for t in range(len(targets)): target_var.append( torch.autograd.Variable(targets[t]).float().cuda(opt.GPU)) z = [] for k in range(opt.numNoise): noise = torch.autograd.Variable( torch.randn((input_var.shape[0], 1, 64, 64))).cuda(opt.GPU) z.append(noise) output, samples = model(input_var, z, action) pred_sample = maximumExpectedUtility(samples, criterion) target = maximumExpectedUtility(target_var, criterion) if opt.DEBUG >= 2: gt = getPreds(target.cpu().numpy()) * 4 pred = getPreds((pred_sample.data).cpu().numpy()) * 4 debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype( np.uint8).copy() debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showAllImg(pause=True) loss = DiscoLoss(output, samples, target_var, criterion) Loss.update(loss.item(), input.size(0)) Acc.update( Accuracy((pred_sample.data).cpu().numpy(), (target.data).cpu().numpy())) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy() input_[0] = Flip(input_[0]).copy() inputFlip_var = torch.autograd.Variable( torch.from_numpy(input_).view(1, input_.shape[1], ref.inputRes, ref.inputRes)).float().cuda( opt.GPU) _, samplesFlip = model(inputFlip_var, z, action) pred_sample_flip = maximumExpectedUtility(samplesFlip, criterion) outputFlip = ShuffleLR( Flip((pred_sample_flip.data).cpu().numpy()[0])).reshape( 1, ref.nJoints, ref.outputRes, ref.outputRes) output_ = old_div(((pred_sample.data).cpu().numpy() + outputFlip), 2) preds.append( finalPreds(output_, meta['center'], meta['scale'], meta['rotate'])[0]) Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split) bar.next() bar.finish() return {'Loss': Loss.avg, 'Acc': Acc.avg}, preds
scale = 1.0 * h / mheight new_im = image.resize((int(w / scale), int(h / scale)), Image.ANTIALIAS) new_im.save(filename) new_im.close() #opt = opts().parse() imageName = './images/test3.jpg' #process_image(imageName) model = torch.load('../model/Stage3/model_10.pth', map_location=lambda storage, loc: storage) img = cv2.imread(imageName) print(type(np.array(img))) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) print(pred, (reg + 1) / 2. * 256) debugger = Debugger() debugger.addImg((input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis=1)) debugger.showImg(pause=True) debugger.show3D()
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc = AverageMeter(), AverageMeter() nIters = len(dataLoader) # bar = Bar('==>', max=nIters) start_time = time.time() for i, (input, target) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target_var = torch.autograd.Variable( target.cuda(async=True)).float().cuda() if (model.hgType == 'vae'): output, latentspace = model(input_var) else: output = model(input_var) if opt.DEBUG >= 2: gt = getPreds(target.cpu().numpy()) * 4 pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4 # init = getPreds(input.numpy()[:, 3:]) debugger = Debugger() img = (input[0].numpy()[:3].transpose(1, 2, 0) * 256).astype( np.uint8).copy() debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) # debugger.addPoint2D(init[0], (0, 255, 0)) debugger.showAllImg(pause=True) #debugger.saveImg('debug/{}.png'.format(i)) loss = criterion(output[0], target_var) for k in range(1, opt.nStack): loss += criterion(output[k], target_var) if (model.hgType == 'vae'): for k in range(0, opt.nStack): loss += ref.vaeloss_wt * _compute_kl(latentspace[k]) Loss.update(loss.data[0], input.size(0)) Acc.update( Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target_var.data).cpu().numpy())) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() # Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split = split) # bar.next() curr_time = time.time() print( '{split} Epoch: [{0}][{1}/{2}]| Total: {total:f} | ETA: {eta:f} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})' .format(epoch, i, nIters, total=curr_time - start_time, eta=(curr_time - start_time) * (nIters - i + 1) / (i + 1), loss=Loss, Acc=Acc, split=split)) # bar.finish() return Loss.avg, Acc.avg
def train(epoch): model.train() nIters = len(train_loader) # bar = Bar('==>', max=nIters) Loss, Acc = AverageMeter(), AverageMeter() start_time = time.time() for i, (inp_img, down_img, pose) in enumerate(train_loader): input_img = Variable(inp_img).float().cuda() target_img = Variable(down_img).float().cuda() input_pose = Variable( torch.FloatTensor(getPreds(pose.cpu().numpy())).view( -1, 32)).float().cuda() recon_img, recon_pose, h_img, h_pose, h_intermed_pose = model( input_img, input_pose) ll_loss_img = criterion(recon_img, target_img) ll_loss_pose = criterion(recon_pose, input_pose) h_img_copy, h_pose_copy = h_img.detach(), h_pose.detach() dissim_loss = F.mse_loss(h_img, h_pose_copy) + F.mse_loss( h_pose, h_img_copy) kl_loss_img = _compute_kl(h_img) kl_loss_pose = _compute_kl(h_intermed_pose) # add kl-div loss for each ae to get vaes # add kl-div loss for dissimilarity total_loss = ll_loss_img * opt.img_recon_wt + ll_loss_pose * opt.pose_recon_wt + dissim_loss * opt.dissim_wt + opt.kl_img_wt * kl_loss_img + opt.kl_pose_wt * kl_loss_pose Loss.update(total_loss.data[0], inp_img.size(0)) img_to_pose = model.forward_i_to_p(h_img) Acc.update( Accuracy_Reg((img_to_pose.data.view(-1, 16, 2)).cpu().numpy(), (input_pose.data.view(-1, 16, 2)).cpu().numpy())) del img_to_pose optimizer.zero_grad() total_loss.backward() optimizer.step() curr_time = time.time() print( '{split} Epoch: [{0}][{1}/{2}]| Total: {total:f} | ETA: {eta:f} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f} )' .format(epoch, i, nIters, total=curr_time - start_time, eta=(curr_time - start_time) * (nIters - i + 1) / (i + 1), loss=Loss, Acc=Acc, split='train')) if i % save_interval == 0: n = min(input_img.size(0), 4) orig_i, recon_i, orig_p, recon_p = target_img[: n].data, recon_img[:n].data, makeSkel_LR_64( input_pose[:n]. data ), makeSkel_LR_64( recon_pose[:n]. data) img_to_pose = model.forward_i_to_p(h_img[:n]) pose_to_img = model.forward_p_to_i(h_pose[:n]) i_to_p, p_to_i = makeSkel_LR_64(img_to_pose.data), pose_to_img.data comparison = torch.cat( [orig_i, recon_i, orig_p, recon_p, i_to_p, p_to_i]) save_image(comparison, saveDir_results + 'reconstruction_' + str(epoch) + "_" + str(i) + '.png', nrow=n) print("Saving results for epoch : {0}, progress : {1:.0f}".format( epoch, 100 * i / len(train_loader)))
criterion = torch.nn.MSELoss() # combined vae model model = torch.load(opt.loadModel).cuda() Loss, Acc = AverageMeter(), AverageMeter() nIters = len(val_loader) start_time = time.time() for i, (inp_img, down_img, pose) in enumerate(val_loader): model.eval() input_img = Variable(inp_img).float().cuda() target_img = Variable(down_img).float().cuda() input_pose = Variable( torch.FloatTensor(getPreds(pose.cpu().numpy())).view( -1, 32)).float().cuda() recon_img, recon_pose, h_img, h_pose = model(input_img, input_pose) ll_loss_img = criterion(recon_img, target_img) ll_loss_pose = criterion(recon_pose, input_pose) dissim_loss = F.mse_loss(h_img, h_pose) total_loss = ll_loss_img * opt.img_recon_wt + ll_loss_pose * opt.pose_recon_wt + dissim_loss * opt.dissim_wt Loss.update(total_loss.data[0], inp_img.size(0)) img_to_pose = model.forward_i_to_p(h_img) Acc.update( Accuracy_Reg((img_to_pose.data.view(-1, 16, 2)).cpu().numpy(), (input_pose.data.view(-1, 16, 2)).cpu().numpy())) del img_to_pose