Esempio n. 1
0
    def estimation(self, img):
        # activate GPUs
        CUDA = torch.cuda.is_available()
        torch.manual_seed(self.seed)
        if CUDA:
            torch.cuda.manual_seed(self.seed)
            self.eval_net.cuda()

        cv2.imshow(
            'Raw Image',
            cv2.resize(img, (img.shape[1], img.shape[0]),
                       interpolation=PIL_Image.BILINEAR))
        cv2.waitKey(1)

        # Transform image from array to PIL image
        img = PIL_Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        img = self.transform(img)

        if self.model.find('mapnet') >= 0:
            if len(self.tmp_img) > 2 * self.skip:
                self.tmp_img.remove(self.tmp_img[0])
            self.tmp_img.append(img)

            skips = self.skip * np.ones(self.steps - 1)
            offsets = np.insert(skips, 0, 0).cumsum()
            offsets -= offsets[-1]
            offsets = offsets.astype(np.int)

            if self.idx > 2 * self.skip:
                index = 2 * self.skip + offsets
            else:
                index = self.idx + offsets

            index = np.minimum(np.maximum(index, 0), len(self.tmp_img) - 1)
            clip = [self.tmp_img[i] for i in index]
            img = torch.stack([c for c in clip], dim=0)

        img = img.unsqueeze(0)
        # output : 1 x 6 or 1 x STEPS x 6
        _, pose = step_feedfwd(img, self.eval_net, CUDA, train=False)
        s = pose.size()
        pose = pose.cpu().data.numpy().reshape((-1, s[-1]))

        # normalize the predicted quaternions
        q = [qexp(p[3:]) for p in pose]
        pose = np.hstack((pose[:, :3], np.asarray(q)))

        # un-normalize the predicted and target translations
        pose[:, :3] = pose[:, :3] * self.max_value
        if args.model.find('mapnet') >= 0:
            pred_pose = pose[-1]
        else:
            pred_pose = pose[0]
        self.idx += 1

        return pred_pose
Esempio n. 2
0
targ_poses = np.zeros((L, 7))  # store all target poses

# inference loop
for batch_idx, (data, target) in enumerate(loader):
    if batch_idx % 200 == 0:
        print 'Image {:d} / {:d}'.format(batch_idx, len(loader))

    # indices into the global arrays storing poses
    if (args.model.find('vid') >= 0) or args.pose_graph:
        idx = data_set.get_indices(batch_idx)
    else:
        idx = [batch_idx]
    idx = idx[len(idx) / 2]

    # output : 1 x 6 or 1 x STEPS x 6
    _, output = step_feedfwd(data, model, CUDA, train=False)
    s = output.size()
    output = output.cpu().data.numpy().reshape((-1, s[-1]))
    target = target.numpy().reshape((-1, s[-1]))

    # normalize the predicted quaternions
    q = [qexp(p[3:]) for p in output]
    output = np.hstack((output[:, :3], np.asarray(q)))
    q = [qexp(p[3:]) for p in target]
    target = np.hstack((target[:, :3], np.asarray(q)))

    if args.pose_graph:  # do pose graph optimization
        kwargs = {'sax': sax, 'saq': saq, 'srx': srx, 'srq': srq}
        # target includes both absolute poses and vos
        vos = target[len(output):]
        target = target[:len(output)]
Esempio n. 3
0
    def forward(self, data, target, criterion, retain_graph=False):
        """
        Args:
            input: input image with shape of (1, 3, H, W)
            class_idx (int): class index for calculating GradCAM.
                    If not specified, the class index that makes the highest model prediction score will be used.
        Return:
            mask: saliency map of the same spatial dimension with input
            logit: model output
        """
        b, c, h, w = data.size()
        score, output = step_feedfwd(data,
                                     self.model_arch,
                                     torch.cuda.is_available(),
                                     criterion=criterion,
                                     target=target,
                                     train=True,
                                     activation_maps=True)
        """
        data_var = Variable(data, requires_grad=False)
        if torch.cuda.is_available():
            data_var = data_var.cuda(async=True)
        output = self.model_arch(data_var)
        ##WARNING: Backpropagation on loss not classification
        dual_target = type(target) is list or type(target) is tuple
        if torch.cuda.is_available():
            if dual_target:
                target = tuple(single_target.cuda(async=True) for single_target in target)
            else:
                target = target.cuda(async=True)
        if dual_target:
            target = tuple(Variable(t, requires_grad=False) for t in target)
            for i in range(len(output)):
                print('Output shape[%d]: %s'%(i, output[i].shape))
                print('Target shape[%d]: %s'%(i, target[i].shape))
        else:
            target = Variable(target, requires_grad=False)
        score = criterion(output, target)
        """

        self.model_arch.zero_grad()
        score.backward(retain_graph=retain_graph)
        gradients = self.gradients['value']  # dS/dA
        activations = self.activations['value']  # A
        #print('Shape gradients: %s'%str(gradients.size()))
        b, k, u, v = gradients.size()

        alpha_num = gradients.pow(2)
        alpha_denom = gradients.pow(2).mul(2) + \
                activations.mul(gradients.pow(3)).view(b, k, u*v).sum(-1, keepdim=True).view(b, k, 1, 1)
        alpha_denom = torch.where(alpha_denom != 0.0, alpha_denom,
                                  torch.ones_like(alpha_denom))

        alpha = alpha_num.div(alpha_denom + 1e-7)
        positive_gradients = F.relu(
            score.exp() * gradients)  # ReLU(dY/dA) == ReLU(exp(S)*dS/dA))
        weights = (alpha * positive_gradients).view(b, k, u * v).sum(-1).view(
            b, k, 1, 1)

        saliency_map = (weights * activations).sum(1, keepdim=True)
        saliency_map = F.relu(saliency_map)
        saliency_map = F.interpolate(saliency_map,
                                     size=(h, w),
                                     mode='bilinear',
                                     align_corners=False)
        saliency_map_min, saliency_map_max = saliency_map.min(
        ), saliency_map.max()
        saliency_map = (saliency_map -
                        saliency_map_min).div(saliency_map_max -
                                              saliency_map_min).data

        return saliency_map, score