def calc_poses(self): pred_poses = np.zeros((self.config.dataset_length, 7)) targ_poses = np.zeros((self.config.dataset_length, 7)) for batch_idx, (data, target) in enumerate(self.dataloader): if batch_idx % 10 == 0: print 'Image {:d} / {:d}'.format(batch_idx * self.config.batch_size, self.config.dataset_length) tail_idx = min( self.config.dataset_length, (batch_idx + 1) * self.config.batch_size ) idx = [idx for idx in xrange(batch_idx * self.config.batch_size, tail_idx)] output = self.step_feedfwd( data=data, model=self.model ) # 1x7 size = output.size() output = output.cpu().data.numpy().reshape((-1, size[-1])) target = target.numpy().reshape((-1, size[-1])) q = [qexp(p[3:]) for p in output] output = np.hstack((output[:, :3], np.asarray(q))) q = [qexp(p[3:]) for p in target] target = np.hstack((target[:, :3], np.asarray(q))) output[:, :3] = (output[:, :3] * self.config.pose_s) + self.config.pose_m target[:, :3] = (target[:, :3] * self.config.pose_s) + self.config.pose_m pred_poses[idx, :] = output targ_poses[idx, :] = target return pred_poses, targ_poses
def estimation(self, img): # activate GPUs CUDA = torch.cuda.is_available() torch.manual_seed(self.seed) if CUDA: torch.cuda.manual_seed(self.seed) self.eval_net.cuda() cv2.imshow( 'Raw Image', cv2.resize(img, (img.shape[1], img.shape[0]), interpolation=PIL_Image.BILINEAR)) cv2.waitKey(1) # Transform image from array to PIL image img = PIL_Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) img = self.transform(img) if self.model.find('mapnet') >= 0: if len(self.tmp_img) > 2 * self.skip: self.tmp_img.remove(self.tmp_img[0]) self.tmp_img.append(img) skips = self.skip * np.ones(self.steps - 1) offsets = np.insert(skips, 0, 0).cumsum() offsets -= offsets[-1] offsets = offsets.astype(np.int) if self.idx > 2 * self.skip: index = 2 * self.skip + offsets else: index = self.idx + offsets index = np.minimum(np.maximum(index, 0), len(self.tmp_img) - 1) clip = [self.tmp_img[i] for i in index] img = torch.stack([c for c in clip], dim=0) img = img.unsqueeze(0) # output : 1 x 6 or 1 x STEPS x 6 _, pose = step_feedfwd(img, self.eval_net, CUDA, train=False) s = pose.size() pose = pose.cpu().data.numpy().reshape((-1, s[-1])) # normalize the predicted quaternions q = [qexp(p[3:]) for p in pose] pose = np.hstack((pose[:, :3], np.asarray(q))) # un-normalize the predicted and target translations pose[:, :3] = pose[:, :3] * self.max_value if args.model.find('mapnet') >= 0: pred_pose = pose[-1] else: pred_pose = pose[0] self.idx += 1 return pred_pose
# indices into the global arrays storing poses if (args.model.find('vid') >= 0) or args.pose_graph: idx = data_set.get_indices(batch_idx) else: idx = [batch_idx] idx = idx[len(idx) / 2] # output : 1 x 6 or 1 x STEPS x 6 _, output = step_feedfwd(data, model, CUDA, train=False) s = output.size() output = output.cpu().data.numpy().reshape((-1, s[-1])) target = target.numpy().reshape((-1, s[-1])) # normalize the predicted quaternions q = [qexp(p[3:]) for p in output] output = np.hstack((output[:, :3], np.asarray(q))) q = [qexp(p[3:]) for p in target] target = np.hstack((target[:, :3], np.asarray(q))) if args.pose_graph: # do pose graph optimization kwargs = {'sax': sax, 'saq': saq, 'srx': srx, 'srq': srq} # target includes both absolute poses and vos vos = target[len(output):] target = target[:len(output)] output = optimize_poses(pred_poses=output, vos=vos, fc_vos=fc_vos, **kwargs) # un-normalize the predicted and target translations
if batch_idx % 200 == 0: print('Image {:d} / {:d}'.format(batch_idx, len(loader))) # indices into the global arrays storing poses if (args.model.find('vid') >= 0) or args.pose_graph: idx = data_set.get_indices(batch_idx) else: idx = [batch_idx] idx = idx[len(idx) // 2] # output : 1 x 6 or 1 x STEPS x 6 _, output = step_feedfwd(data, model, CUDA, train=False) s = output.size() output = output.cpu().data.numpy().reshape((-1, s[-1])) # normalize the predicted quaternions q = [qexp(p[3:]) for p in output] output = np.hstack((output[:, :3], np.asarray(q))) # un-normalize the predicted and target translations output[:, :3] = (output[:, :3] * pose_s) + pose_m # take the middle prediction pred_poses[idx, :] = output[len(output) // 2] with open('logs/result_{}_{}.txt'.format(args.dataset, args.model), 'w') as f: for fn, pred_pose in zip(fnames, pred_poses): f.write('{} {}\n'.format( fn, ' '.join(['{:.6f}'.format(x) for x in pred_pose])))
idx = idx[len(idx) // 2] with torch.set_grad_enabled(False): data_var = Variable(data, requires_grad=False) if CUDA: data_var = data_var.cuda(async=True) output = model.__feature_vector__(data_var) if args.model == 'multitask': output = output[0] vector = output.detach().cpu().numpy() if len(vector.shape) > 1: vector = vector[vector.shape[0]//2] feature_vectors.append(vector) distance.append(np.linalg.norm(vector)) target = target[0] target = target.numpy().reshape((-1, 6)) q = [qexp(p[3:]) for p in target] target = np.hstack((target[:, :3], np.asarray(q))) target[:, :3] = (target[:, :3] * pose_s) + pose_m targ_poses[idx, :] = target[len(target) // 2] feature_vectors = np.vstack(feature_vectors) #distance = np.stack(distance) distance = np.stack([np.linalg.norm(targ_poses[i, :3]) for i in range(targ_poses.shape[0])]) print(feature_vectors.shape) t1 = time.time() embedding = TSNE(n_components=2).fit_transform(feature_vectors) t = time.time() - t1 print('TSNE took %d seconds'%t) print(embedding.shape)
# loader batch_size = 25 loader = DataLoader(dset, batch_size=batch_size, shuffle=False, num_workers=4) # collect poses and losses real_pose = np.empty((0, 6)) gt_pose = np.empty((0, 6)) for (rp, gp) in loader: assert len(rp) == len(gp) real_pose = np.vstack((real_pose, rp.numpy())) gt_pose = np.vstack((gt_pose, gp.numpy())) # un-normalize and convert to quaternion real_pose[:, :3] = (real_pose[:, :3] * pose_s) + pose_m gt_pose[:, :3] = (gt_pose[:, :3] * pose_s) + pose_m q = [qexp(p[3:]) for p in real_pose] real_pose = np.hstack((real_pose[:, :3], np.asarray(q))) q = [qexp(p[3:]) for p in gt_pose] gt_pose = np.hstack((gt_pose[:, :3], np.asarray(q))) # visualization loop T = np.asarray([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') plt.subplots_adjust(left=0, bottom=0, right=1, top=1) for r, g in zip(real_pose[::args.subsample], gt_pose[::args.subsample]): ax.scatter(r[0], r[1], zs=r[2], c='r') ax.scatter(g[0], g[1], zs=g[2], c='g') pp = np.vstack((r, g)) ax.plot(pp[:, 0], pp[:, 1], zs=pp[:, 2], c='b') ax.view_init(azim=-137, elev=52)