Beispiel #1
0
    def calc_poses(self):
        pred_poses = np.zeros((self.config.dataset_length, 7))
        targ_poses = np.zeros((self.config.dataset_length, 7))

        for batch_idx, (data, target) in enumerate(self.dataloader):
            if batch_idx % 10 == 0:
                print 'Image {:d} / {:d}'.format(batch_idx * self.config.batch_size, self.config.dataset_length)
            tail_idx = min(
                self.config.dataset_length,
                (batch_idx + 1) * self.config.batch_size
            )
            idx = [idx for idx in xrange(batch_idx * self.config.batch_size, tail_idx)]

            output = self.step_feedfwd(
                data=data,
                model=self.model
            )
            # 1x7
            size = output.size()
            output = output.cpu().data.numpy().reshape((-1, size[-1]))
            target = target.numpy().reshape((-1, size[-1]))

            q = [qexp(p[3:]) for p in output]
            output = np.hstack((output[:, :3], np.asarray(q)))
            q = [qexp(p[3:]) for p in target]
            target = np.hstack((target[:, :3], np.asarray(q)))

            output[:, :3] = (output[:, :3] * self.config.pose_s) + self.config.pose_m
            target[:, :3] = (target[:, :3] * self.config.pose_s) + self.config.pose_m

            pred_poses[idx, :] = output
            targ_poses[idx, :] = target


        return pred_poses, targ_poses
Beispiel #2
0
    def estimation(self, img):
        # activate GPUs
        CUDA = torch.cuda.is_available()
        torch.manual_seed(self.seed)
        if CUDA:
            torch.cuda.manual_seed(self.seed)
            self.eval_net.cuda()

        cv2.imshow(
            'Raw Image',
            cv2.resize(img, (img.shape[1], img.shape[0]),
                       interpolation=PIL_Image.BILINEAR))
        cv2.waitKey(1)

        # Transform image from array to PIL image
        img = PIL_Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        img = self.transform(img)

        if self.model.find('mapnet') >= 0:
            if len(self.tmp_img) > 2 * self.skip:
                self.tmp_img.remove(self.tmp_img[0])
            self.tmp_img.append(img)

            skips = self.skip * np.ones(self.steps - 1)
            offsets = np.insert(skips, 0, 0).cumsum()
            offsets -= offsets[-1]
            offsets = offsets.astype(np.int)

            if self.idx > 2 * self.skip:
                index = 2 * self.skip + offsets
            else:
                index = self.idx + offsets

            index = np.minimum(np.maximum(index, 0), len(self.tmp_img) - 1)
            clip = [self.tmp_img[i] for i in index]
            img = torch.stack([c for c in clip], dim=0)

        img = img.unsqueeze(0)
        # output : 1 x 6 or 1 x STEPS x 6
        _, pose = step_feedfwd(img, self.eval_net, CUDA, train=False)
        s = pose.size()
        pose = pose.cpu().data.numpy().reshape((-1, s[-1]))

        # normalize the predicted quaternions
        q = [qexp(p[3:]) for p in pose]
        pose = np.hstack((pose[:, :3], np.asarray(q)))

        # un-normalize the predicted and target translations
        pose[:, :3] = pose[:, :3] * self.max_value
        if args.model.find('mapnet') >= 0:
            pred_pose = pose[-1]
        else:
            pred_pose = pose[0]
        self.idx += 1

        return pred_pose
Beispiel #3
0
    # indices into the global arrays storing poses
    if (args.model.find('vid') >= 0) or args.pose_graph:
        idx = data_set.get_indices(batch_idx)
    else:
        idx = [batch_idx]
    idx = idx[len(idx) / 2]

    # output : 1 x 6 or 1 x STEPS x 6
    _, output = step_feedfwd(data, model, CUDA, train=False)
    s = output.size()
    output = output.cpu().data.numpy().reshape((-1, s[-1]))
    target = target.numpy().reshape((-1, s[-1]))

    # normalize the predicted quaternions
    q = [qexp(p[3:]) for p in output]
    output = np.hstack((output[:, :3], np.asarray(q)))
    q = [qexp(p[3:]) for p in target]
    target = np.hstack((target[:, :3], np.asarray(q)))

    if args.pose_graph:  # do pose graph optimization
        kwargs = {'sax': sax, 'saq': saq, 'srx': srx, 'srq': srq}
        # target includes both absolute poses and vos
        vos = target[len(output):]
        target = target[:len(output)]
        output = optimize_poses(pred_poses=output,
                                vos=vos,
                                fc_vos=fc_vos,
                                **kwargs)

    # un-normalize the predicted and target translations
Beispiel #4
0
        if batch_idx % 200 == 0:
            print('Image {:d} / {:d}'.format(batch_idx, len(loader)))

        # indices into the global arrays storing poses
        if (args.model.find('vid') >= 0) or args.pose_graph:
            idx = data_set.get_indices(batch_idx)
        else:
            idx = [batch_idx]
        idx = idx[len(idx) // 2]

        # output : 1 x 6 or 1 x STEPS x 6
        _, output = step_feedfwd(data, model, CUDA, train=False)
        s = output.size()
        output = output.cpu().data.numpy().reshape((-1, s[-1]))

        # normalize the predicted quaternions
        q = [qexp(p[3:]) for p in output]
        output = np.hstack((output[:, :3], np.asarray(q)))

        # un-normalize the predicted and target translations
        output[:, :3] = (output[:, :3] * pose_s) + pose_m

        # take the middle prediction
        pred_poses[idx, :] = output[len(output) // 2]

    with open('logs/result_{}_{}.txt'.format(args.dataset, args.model),
              'w') as f:
        for fn, pred_pose in zip(fnames, pred_poses):
            f.write('{} {}\n'.format(
                fn, ' '.join(['{:.6f}'.format(x) for x in pred_pose])))
Beispiel #5
0
            idx = idx[len(idx) // 2]
            with torch.set_grad_enabled(False):
                data_var = Variable(data, requires_grad=False)
                if CUDA:
                    data_var = data_var.cuda(async=True)
                output = model.__feature_vector__(data_var)
                if args.model == 'multitask':
                    output = output[0]
                vector = output.detach().cpu().numpy()
                if len(vector.shape) > 1:
                    vector = vector[vector.shape[0]//2]
                feature_vectors.append(vector)
                distance.append(np.linalg.norm(vector))
            target = target[0]
            target = target.numpy().reshape((-1, 6))
            q = [qexp(p[3:]) for p in target]
            target = np.hstack((target[:, :3], np.asarray(q)))
            target[:, :3] = (target[:, :3] * pose_s) + pose_m
            targ_poses[idx, :] = target[len(target) // 2]


        feature_vectors = np.vstack(feature_vectors)

        #distance = np.stack(distance)
        distance = np.stack([np.linalg.norm(targ_poses[i, :3]) for i in range(targ_poses.shape[0])])
        print(feature_vectors.shape)
        t1 = time.time()
        embedding = TSNE(n_components=2).fit_transform(feature_vectors)
        t = time.time() - t1
        print('TSNE took %d seconds'%t)
        print(embedding.shape)
Beispiel #6
0
# loader
batch_size = 25
loader = DataLoader(dset, batch_size=batch_size, shuffle=False, num_workers=4)

# collect poses and losses
real_pose = np.empty((0, 6))
gt_pose = np.empty((0, 6))
for (rp, gp) in loader:
    assert len(rp) == len(gp)
    real_pose = np.vstack((real_pose, rp.numpy()))
    gt_pose = np.vstack((gt_pose, gp.numpy()))

# un-normalize and convert to quaternion
real_pose[:, :3] = (real_pose[:, :3] * pose_s) + pose_m
gt_pose[:, :3] = (gt_pose[:, :3] * pose_s) + pose_m
q = [qexp(p[3:]) for p in real_pose]
real_pose = np.hstack((real_pose[:, :3], np.asarray(q)))
q = [qexp(p[3:]) for p in gt_pose]
gt_pose = np.hstack((gt_pose[:, :3], np.asarray(q)))

# visualization loop
T = np.asarray([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
plt.subplots_adjust(left=0, bottom=0, right=1, top=1)
for r, g in zip(real_pose[::args.subsample], gt_pose[::args.subsample]):
    ax.scatter(r[0], r[1], zs=r[2], c='r')
    ax.scatter(g[0], g[1], zs=g[2], c='g')
    pp = np.vstack((r, g))
    ax.plot(pp[:, 0], pp[:, 1], zs=pp[:, 2], c='b')
    ax.view_init(azim=-137, elev=52)