Esempio n. 1
0
    def forward_step(self, input_var, hidden, encoder_outputs, encoder_mask):
        batch_size = input_var.size(0)
        dec_len = input_var.size(1)
        embedded = self.embedding(input_var)
        embedded = self.input_dropout(embedded)

        output, hidden = self.lstm(embedded, hidden)

        attn = None
        p_copy = None
        if self.use_attention:
            # output ~ [ht, attn_ctx]
            output, attn = self.attention(output, encoder_outputs,
                                          encoder_mask)
        if self.use_copy:
            p_copy = F.sigmoid(
                self.copy(
                    torch.cat(
                        (output, embedded),
                        dim=2).view(batch_size * dec_len,
                                    -1))).squeeze(1).view(batch_size, dec_len)

        predicted_softmax = stable_softmax(
            self.out(output.contiguous().view(-1, self.hidden_dim))).view(
                batch_size, dec_len, -1)
        return predicted_softmax, hidden, attn, p_copy
Esempio n. 2
0
    def forward(self, x):

        self.z1 = np.array([self.W1.dot(x)]).transpose() + self.b1
        vec_rectified_linear_unit = np.vectorize(rectified_linear_unit)
        self.h = vec_rectified_linear_unit(self.z1)
        self.h = self.h.transpose()[0]
        self.z2 = np.array([self.W2.dot(self.h)]).transpose() + self.b2
        self.y_hat = stable_softmax(self.z2)
Esempio n. 3
0
    def forward(self, output, context, mask):
        # refer to OpenNMT-py
        # https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/modules/global_attention.py#L121

        dim = self.dim
        src_batch, src_len, src_dim = context.size()
        tgt_batch, tgt_len, tgt_dim = output.size()
        assert src_batch == tgt_batch
        assert src_dim == tgt_dim
        assert src_dim == self.dim

        hehe = torch.isnan(output)
        if torch.sum(hehe) > 0:
            print('nan found in output:', output)
            for i in range(tgt_batch):
                for j in range(tgt_len):
                    bb = torch.isnan(output[i, j, :])
                    if torch.sum(bb) > 0:
                        print(output[i, j, :])
            sys.exit()
        hehe = torch.isnan(context)
        if torch.sum(hehe) > 0:
            print('nan found in context:', context)
            sys.exit()

        wq = self.linear_query(output.contiguous().view(-1, dim))
        wq = wq.view(tgt_batch, tgt_len, 1, dim)
        wq = wq.expand(tgt_batch, tgt_len, src_len, dim)

        hehe = torch.isnan(wq)
        if torch.sum(hehe) > 0:
            print('nan found in wq:', wq)
            sys.exit()

        uh = self.linear_context(context.contiguous().view(-1, dim))
        uh = uh.view(src_batch, 1, src_len, dim)
        uh = uh.expand(src_batch, tgt_len, src_len, dim)

        hehe = torch.isnan(uh)
        if torch.sum(hehe) > 0:
            print('nan found in uh:', uh)
            sys.exit()

        wquh = F.tanh(wq + uh)

        hehe = torch.isnan(wquh)
        if torch.sum(hehe) > 0:
            print('nan found in wquh:', wquh)
            sys.exit()

        score = self.v(wquh.view(-1, dim)).view(tgt_batch, tgt_len, src_len)
        hehe = torch.isnan(score)
        if torch.sum(hehe) > 0:
            print('nan found in score:', score)
            for i in range(tgt_batch):
                for j in range(tgt_len):
                    bb = torch.isnan(score[i, j, :])
                    if torch.sum(bb) > 0:
                        print(score[i, j, :])
            sys.exit()
        # mask = [batch_size, tgt_len, src_len]
        mask = mask.unsqueeze(1).expand_as(
            score)  #.contiguous().view(src_batch*tgt_len, src_len)
        attn = stable_softmax(score, mask)
        ##score.data.masked_fill_(mask, -float('inf'))
        #score = score.view(tgt_batch*tgt_len, src_len)
        #max_by_row = torch.max(score, dim=1, keepdim=True)[0]
        ##attn = F.softmax(score-max_by_row, dim=1).view(tgt_batch, tgt_len, src_len)
        #attn = torch.exp(score-max_by_row) * (1.0 - mask.float())
        #sum_attn = torch.sum(attn, dim=1, keepdim=True)
        ##zero_mask = torch.eq(attn, 0.)
        #attn = attn/sum_attn
        ##attn.masked_fill_(zero_mask, 0.)
        #attn = attn.view(tgt_batch, tgt_len, src_len)

        # (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim)
        mix = torch.bmm(attn, context)

        # concat -> (batch, out_len, 2*dim)
        combined = torch.cat((mix, output), dim=2)
        # output -> (batch, out_len, dim)
        output = self.linear_out(combined.view(-1, 2 * dim)).view(
            tgt_batch, -1, dim)
        # output ~ [ht, attn_ctx]

        return output, attn
Esempio n. 4
0
def detect_video(model, dataset, video_path):
    ''' Experimental'''

    import cv2

    # Video capture
    vcapture = cv2.VideoCapture(video_path)
    width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = vcapture.get(cv2.CAP_PROP_FPS)

    # Camera projection mat
    width = dataset.camera.width / 2  # TODO: work on original image size not 1/2
    height = dataset.camera.height / 2
    fov_horizontal = np.pi / 2
    fx = width / (2 * np.tan(dataset.camera.fov_x / 2))
    fy = -height / (2 * np.tan(dataset.camera.fov_y / 2))
    K = np.matrix([[fx, 0, width / 2], [0, fy, height / 2], [0, 0, 1]])

    R_cam_unreal = np.matrix([[0, 1, 0], [0, 0, 1], [1, 0, 0]])

    # Define codec and create video writer
    vwriter = cv2.VideoWriter("video_real.avi",
                              cv2.VideoWriter_fourcc(*'MJPG'), fps,
                              (int(width), int(height)))

    count = 0
    pose_est_acc = []
    success = True
    while success:
        print("frame: ", count)
        count += 1
        # Read next image
        success, image = vcapture.read()
        if success and count > 16900:
            # OpenCV returns images as BGR, convert to RGB
            image = image[..., ::-1]
            image = image[:, 1:-150, :]  # crop
            image = np.pad(image, [(400, 400), (400, 400), (0, 0)],
                           mode='constant',
                           constant_values=0)
            image[:, :,
                  0] = 0.21 * image[:, :,
                                    0] + 0.72 * image[:, :,
                                                      1] + 0.07 * image[:, :,
                                                                        2]
            image[:, :, 1] = image[:, :, 0]
            image[:, :, 2] = image[:, :, 0]

            # Resize to network input shape
            molded_image, window, scale, padding, crop = utils.resize_image(
                image,
                min_dim=model.config.IMAGE_MIN_DIM,
                min_scale=model.config.IMAGE_MIN_SCALE,
                max_dim=model.config.IMAGE_MAX_DIM,
                mode=model.config.IMAGE_RESIZE_MODE)

            # Detect objects
            results = model.detect([image], verbose=0)[0]

            loc_est = results['loc']

            ori_pmf = utils.stable_softmax(results['ori'])
            q_est, q_est_cov = se3lib.quat_weighted_avg(
                dataset.ori_histogram_map, ori_pmf)

            z = loc_est[2]
            x = loc_est[0]
            y = loc_est[1]
            print(str(z) + " " + str(x) + " " + str(y))

            # Recover Unreal orientation: R_wo
            R_co = se3lib.quat2SO3(q_est)
            R_co = R_cam_unreal.T * R_co
            R_wc = se3lib.euler2SO3_unreal(0, 0, 0)
            R_wo = R_wc * R_co
            roll, pitch, yaw = se3lib.SO32euler(R_wo)
            #
            print(str(-pitch) + " " + str(yaw) + " " + str(-roll))

            # Stack frame gt
            pose_est = np.array(
                [loc_est[2], loc_est[0], loc_est[1], -pitch, yaw, -roll])
            pose_est_acc.append(pose_est)

            # Crop and resize image to match original input size
            margin = (model.config.IMAGE_MAX_DIM - 480) // 2
            image = molded_image[margin:model.config.IMAGE_MAX_DIM -
                                 margin, :, :]

            # Show image
            #fig, ax_1 = plt.subplots(1, 1, figsize=(12, 8))

            utils.plot_axes(image, q_est, loc_est, K, 5.0)
            # ax_1.imshow(image)
            # ax_1.set_xticks([])
            # ax_1.set_yticks([])

            nr_bins_per_dim = model.config.ORI_BINS_PER_DIM
            utils.visualize_weights(ori_pmf, ori_pmf, nr_bins_per_dim)

            # plt.show(block=True)
            # Add image to video writer
            vwriter.write(image)

        if count > 17200:
            success = False

    vwriter.release()
Esempio n. 5
0
def detect_dataset(model, dataset, nr_images):
    """ Tests model on N random images of the dataset
     and shows the results.
    """

    # Variance used only for prob. orientation estimation
    delta = model.config.BETA / model.config.ORI_BINS_PER_DIM
    var = delta**2 / 12

    for i in range(nr_images):
        image_id = random.choice(dataset.image_ids)

        # Load pose in all formats
        loc_gt = dataset.load_location(image_id)
        q_gt = dataset.load_quaternion(image_id)
        I, I_meta, loc_encoded_gt, ori_encoded_gt = \
            net.load_image_gt(dataset, model.config, image_id)
        image_ori = dataset.load_image(image_id)

        info = dataset.image_info[image_id]

        # Run detection
        results = model.detect([image_ori], verbose=1)

        # Retrieve location
        if model.config.REGRESS_LOC:
            loc_est = results[0]['loc']
        else:
            loc_pmf = utils.stable_softmax(results[0]['loc'])

            # Compute location mean according to first moment
            loc_est = np.asmatrix(loc_pmf) * np.asmatrix(
                dataset.histogram_3D_map)

            # Compute loc encoding error
            loc_encoded_gt = np.asmatrix(loc_encoded_gt) * np.asmatrix(
                dataset.histogram_3D_map)
            loc_encoded_err = np.linalg.norm(loc_encoded_gt - loc_gt)

        # Retrieve orientation
        if model.config.REGRESS_ORI:

            if model.config.ORIENTATION_PARAM == 'quaternion':
                q_est = results[0]['ori']
            elif model.config.ORIENTATION_PARAM == 'euler_angles':
                q_est = se3lib.SO32quat(
                    se3lib.euler2SO3_left(results[0]['ori'][0],
                                          results[0]['ori'][1],
                                          results[0]['ori'][2]))
            elif model.config.ORIENTATION_PARAM == 'angle_axis':
                theta = np.linalg.norm(results[0]['ori'])
                if theta < 1e-6:
                    v = [0, 0, 0]
                else:
                    v = results[0]['ori'] / theta
                q_est = se3lib.angleaxis2quat(v, theta)
        else:
            ori_pmf = utils.stable_softmax(results[0]['ori'])

            # Compute mean quaternion
            q_est, q_est_cov = se3lib.quat_weighted_avg(
                dataset.ori_histogram_map, ori_pmf)

            # Multimodal estimation
            # Uncomment this block to try the EM framework
            # nr_EM_iterations = 5
            # Q_mean, Q_var, Q_priors, model_scores = fit_GMM_to_orientation(dataset.ori_histogram_map, ori_pmf, nr_EM_iterations, var)
            # print('Multimodal errors',2 * np.arccos(np.abs(np.asmatrix(Q_mean) * np.asmatrix(q_gt).transpose())) * 180 / np.pi)
            #
            # q_est_1 = Q_mean[0, :]
            # q_est_2 = Q_mean[1, :]
            # utils.polar_plot(q_est_1, q_est_2)

        # Compute Errors
        angular_err = 2 * np.arccos(
            np.abs(np.asmatrix(q_est) *
                   np.asmatrix(q_gt).transpose())) * 180 / np.pi
        loc_err = np.linalg.norm(loc_est - loc_gt)

        print('GT location: ', loc_gt)
        print('Est location: ', loc_est)
        print('Processed Image:', info['path'])
        print('Est orientation: ', q_est)
        print('GT_orientation: ', q_gt)

        print('Location error: ', loc_err)
        print('Angular error: ', angular_err)

        # Visualize PMFs
        if not model.config.REGRESS_ORI:

            nr_bins_per_dim = model.config.ORI_BINS_PER_DIM
            utils.visualize_weights(ori_encoded_gt, ori_pmf, nr_bins_per_dim)

        # Show image
        fig, (ax_1, ax_2) = plt.subplots(1, 2, figsize=(12, 8))
        ax_1.imshow(image_ori)
        ax_1.set_xticks([])
        ax_1.set_yticks([])
        ax_2.imshow(image_ori)
        ax_2.set_xticks([])
        ax_2.set_yticks([])

        height_ori = np.shape(image_ori)[0]
        width_ori = np.shape(image_ori)[1]

        # Recover focal lengths
        fx = dataset.camera.fx
        fy = dataset.camera.fy

        K = np.matrix([[fx, 0, width_ori / 2], [0, fy, height_ori / 2],
                       [0, 0, 1]])

        # Speed  labels expresses q_obj_cam whereas
        # Urso labels expresses q_cam_obj
        if dataset.name == 'Speed':
            q_est = se3lib.quat_inv(q_est)
            q_gt = se3lib.quat_inv(q_gt)

        utils.visualize_axes(ax_1, q_gt, loc_gt, K, 100)
        utils.visualize_axes(ax_2, q_est, loc_est, K, 100)

        utils.polar_plot(q_gt, q_est)

        # Location overlap visualization
        fig, ax = plt.subplots()
        ax.imshow(image_ori)

        # Project 3D coords for visualization
        x_est = loc_est[0] / loc_est[2]
        y_est = loc_est[1] / loc_est[2]

        x_gt = loc_gt[0] / loc_gt[2]
        y_gt = loc_gt[1] / loc_gt[2]

        if not model.config.REGRESS_LOC:
            x_decoded_gt = loc_encoded_gt[0, 0] / loc_encoded_gt[0, 2]
            y_decoded_gt = loc_encoded_gt[0, 1] / loc_encoded_gt[0, 2]

            circ = Circle((x_decoded_gt * fx + width_ori / 2,
                           height_ori / 2 + y_decoded_gt * fy),
                          7,
                          facecolor='b',
                          label='encoded')
            ax.add_patch(circ)

        # Plot locations
        circ_gt = Circle(
            (x_gt * fx + width_ori / 2, height_ori / 2 + y_gt * fy),
            15,
            facecolor='r',
            label='gt')
        ax.add_patch(circ_gt)

        circ = Circle(
            (x_est * fx + width_ori / 2, height_ori / 2 + y_est * fy),
            10,
            facecolor='g',
            label='pred')
        ax.add_patch(circ)

        ax.legend(loc='upper right', shadow=True, fontsize='x-small')
        plt.show()
Esempio n. 6
0
def evaluate(model, dataset):
    """ Evaluates model on all dataset images. Assumes all images have corresponding pose labels.
    """

    loc_err_acc = []
    loc_encoded_err_acc = []
    ori_err_acc = []
    ori_encoded_err_acc = []
    distances_acc = []
    esa_scores_acc = []

    # Variance used only for prob. orientation estimation
    delta = model.config.BETA / model.config.ORI_BINS_PER_DIM
    var = delta**2 / 12

    for image_id in dataset.image_ids:

        print('Image ID:', image_id)

        # Load pose in all formats
        loc_gt = dataset.load_location(image_id)
        q_gt = dataset.load_quaternion(image_id)
        image = dataset.load_image(image_id)

        results = model.detect([image], verbose=1)

        if model.config.REGRESS_KEYPOINTS:
            # Experimental

            I, I_meta, loc_gt, k1_gt, k2_gt = \
                net.load_image_gt(dataset, model.config, image_id)

            loc_est = results[0]['loc']
            k1_est = results[0]['k1']
            k2_est = results[0]['k2']

            # Prepare keypoint matches
            # TODO: take scale into account and get rid of magic numbers
            P1 = np.zeros((3, 3))
            P1[2, 0] = 3.0
            P1[1, 1] = 3.0

            P2 = np.zeros((3, 3))
            P2[:, 0] = k1_est
            P2[:, 1] = k2_est
            P2[:, 2] = loc_est

            t, R = se3lib.pose_3Dto3D(np.asmatrix(P1), np.asmatrix(P2))
            q_est = se3lib.SO32quat(R.T)

        else:

            I, I_meta, loc_encoded_gt, ori_encoded_gt = \
                net.load_image_gt(dataset, model.config, image_id)

            # Retrieve location
            if model.config.REGRESS_LOC:
                loc_est = results[0]['loc']
            else:
                loc_pmf = utils.stable_softmax(results[0]['loc'])

                # Compute location mean according to first moment
                loc_est = np.asmatrix(loc_pmf) * np.asmatrix(
                    dataset.histogram_3D_map)

                # Compute loc encoding error
                loc_decoded_gt = np.asmatrix(loc_encoded_gt) * np.asmatrix(
                    dataset.histogram_3D_map)
                loc_encoded_err = np.linalg.norm(loc_decoded_gt - loc_gt)
                loc_encoded_err_acc.append(loc_encoded_err)

            # Retrieve orientation
            if model.config.REGRESS_ORI:

                if model.config.ORIENTATION_PARAM == 'quaternion':
                    q_est = results[0]['ori']
                elif model.config.ORIENTATION_PARAM == 'euler_angles':
                    q_est = se3lib.SO32quat(
                        se3lib.euler2SO3_left(results[0]['ori'][0],
                                              results[0]['ori'][1],
                                              results[0]['ori'][2]))
                elif model.config.ORIENTATION_PARAM == 'angle_axis':
                    theta = np.linalg.norm(results[0]['ori'])
                    if theta < 1e-6:
                        v = [0, 0, 0]
                    else:
                        v = results[0]['ori'] / theta
                    q_est = se3lib.angleaxis2quat(v, theta)
            else:

                ori_pmf = utils.stable_softmax(results[0]['ori'])

                # Compute mean quaternion
                q_est, q_est_cov = se3lib.quat_weighted_avg(
                    dataset.ori_histogram_map, ori_pmf)

                # Multimodal estimation
                # Uncomment this block to try the EM framework
                # nr_EM_iterations = 5
                # Q_mean, Q_var, Q_priors, model_scores = fit_GMM_to_orientation(dataset.ori_histogram_map, ori_pmf,
                #                                                                nr_EM_iterations, var)
                #
                # print('Err:', angular_err)
                # angular_err = 2*np.arccos(np.abs(np.asmatrix(Q_mean)*np.asmatrix(q_gt).transpose()))*180/np.pi
                #
                # # Select best of two
                # if len(angular_err) == 1 or angular_err[0]<angular_err[1]:
                #     q_est = Q_mean[0, :]
                # else:
                #     q_est = Q_mean[1, :]
                #
                # print('Err:',angular_err)

                # Compute encoded error
                q_encoded_gt, _ = se3lib.quat_weighted_avg(
                    dataset.ori_histogram_map, ori_encoded_gt)
                ori_encoded_err = 2 * np.arccos(
                    np.abs(
                        np.asmatrix(q_encoded_gt) *
                        np.asmatrix(q_gt).transpose())) * 180 / np.pi
                ori_encoded_err_acc.append(ori_encoded_err)

        # 3. Angular error
        angular_err = 2 * np.arccos(
            np.abs(np.asmatrix(q_est) *
                   np.asmatrix(q_gt).transpose())) * 180 / np.pi
        ori_err_acc.append(angular_err.item(0))

        # 4. Loc error
        loc_err = np.linalg.norm(loc_est - loc_gt)
        loc_err_acc.append(loc_err)

        print('Loc Error: ', loc_err)
        print('Ori Error: ', angular_err)

        # Compute ESA score
        esa_score = loc_err / np.linalg.norm(loc_gt) + 2 * np.arccos(
            np.abs(np.asmatrix(q_est) * np.asmatrix(q_gt).transpose()))
        esa_scores_acc.append(esa_score)

        # Store depth
        distances_acc.append(loc_gt[2])

    print('Mean est. location error: ', np.mean(loc_err_acc))
    print('Mean est. orientation error: ', np.mean(ori_err_acc))
    print('ESA score: ', np.mean(esa_scores_acc))
    print('Mean encoded location error: ', np.mean(loc_encoded_err_acc))

    # Dump results
    pd.DataFrame(np.asarray(ori_err_acc)).to_csv("ori_err.csv")
    pd.DataFrame(np.asarray(loc_err_acc)).to_csv("loc_err.csv")
    pd.DataFrame(np.asarray(distances_acc)).to_csv("dists_err.csv")
Esempio n. 7
0
def test_and_submit(model, dataset_virtual, dataset_real):
    """ Evaluates model on ESA challenge test-set (no labels)
    and outputs submission file in a format compatible with the ESA server (probably down by now)
    """

    # ESA API
    from submission import SubmissionWriter
    submission = SubmissionWriter()

    # TODO: Make the next 2 loops a nested loop

    # Synthetic test set
    for image_id in dataset_virtual.image_ids:

        print('Image ID:', image_id)

        image = dataset_virtual.load_image(image_id)
        info = dataset_virtual.image_info[image_id]

        results = model.detect([image], verbose=1)

        # Retrieve location
        if model.config.REGRESS_LOC:
            loc_est = results[0]['loc']
        else:
            loc_pmf = utils.stable_softmax(results[0]['loc'])

            # Compute location mean according to first moment
            loc_est = np.asmatrix(loc_pmf) * np.asmatrix(
                dataset_virtual.histogram_3D_map)

        # Retrieve orientation
        if model.config.REGRESS_ORI:

            if model.config.ORIENTATION_PARAM == 'quaternion':
                q_est = results[0]['ori']
            elif model.config.ORIENTATION_PARAM == 'euler_angles':
                q_est = se3lib.SO32quat(
                    se3lib.euler2SO3_left(results[0]['ori'][0],
                                          results[0]['ori'][1],
                                          results[0]['ori'][2]))
            elif model.config.ORIENTATION_PARAM == 'angle_axis':
                theta = np.linalg.norm(results[0]['ori'])
                if theta < 1e-6:
                    v = [0, 0, 0]
                else:
                    v = results[0]['ori'] / theta
                q_est = se3lib.angleaxis2quat(v, theta)
        else:
            ori_pmf = utils.stable_softmax(results[0]['ori'])

            # Compute mean quaternion
            q_est, q_est_cov = se3lib.quat_weighted_avg(
                dataset_virtual.ori_histogram_map, ori_pmf)

        # Change quaternion order
        q_rect = [q_est[3], q_est[0], q_est[1], q_est[2]]

        submission.append_test(info['path'].split('/')[-1], q_rect, loc_est)

    # Real test set

    for image_id in dataset_real.image_ids:

        print('Image ID:', image_id)

        image = dataset_real.load_image(image_id)
        info = dataset_real.image_info[image_id]

        results = model.detect([image], verbose=1)

        # Retrieve location
        if model.config.REGRESS_LOC:
            loc_est = results[0]['loc']
        else:
            loc_pmf = utils.stable_softmax(results[0]['loc'])

            # Compute location mean according to first moment
            loc_est = np.asmatrix(loc_pmf) * np.asmatrix(
                dataset_real.histogram_3D_map)

        # Retrieve orientation
        if model.config.REGRESS_ORI:

            if model.config.ORIENTATION_PARAM == 'quaternion':
                q_est = results[0]['ori']
            elif model.config.ORIENTATION_PARAM == 'euler_angles':
                q_est = se3lib.SO32quat(
                    se3lib.euler2SO3_left(results[0]['ori'][0],
                                          results[0]['ori'][1],
                                          results[0]['ori'][2]))
            elif model.config.ORIENTATION_PARAM == 'angle_axis':
                theta = np.linalg.norm(results[0]['ori'])
                if theta < 1e-6:
                    v = [0, 0, 0]
                else:
                    v = results[0]['ori'] / theta
                q_est = se3lib.angleaxis2quat(v, theta)
        else:
            ori_pmf = utils.stable_softmax(results[0]['ori'])

            # Compute mean quaternion
            q_est, q_est_cov = se3lib.quat_weighted_avg(
                dataset_real.ori_histogram_map, ori_pmf)

        # Change quaternion order
        q_rect = [q_est[3], q_est[0], q_est[1], q_est[2]]

        submission.append_real_test(info['path'].split('/')[-1], q_rect,
                                    loc_est)

    submission.export(suffix='debug')
    print('Submission exported.')
Esempio n. 8
0
def evaluate_image(model, dataset, image_id):

    # Load pose in all formats
    loc_gt = dataset.load_location(image_id)
    q_gt = dataset.load_quaternion(image_id)
    image = dataset.load_image(image_id)
    I, I_meta, loc_encoded_gt, ori_encoded_gt = \
        net.load_image_gt(dataset, model.config, image_id)

    results = model.detect([image], verbose=1)

    # Retrieve location
    if model.config.REGRESS_LOC:
        loc_est = results[0]['loc']
    else:
        loc_pmf = utils.stable_softmax(results[0]['loc'])

        # Compute location mean according to first moment
        loc_est = np.asmatrix(loc_pmf) * np.asmatrix(dataset.histogram_3D_map)

        # Compute loc encoding error
        loc_decoded_gt = np.asmatrix(loc_encoded_gt) * np.asmatrix(
            dataset.histogram_3D_map)
        loc_encoded_err = np.linalg.norm(loc_decoded_gt - loc_gt)

    # Retrieve orientation
    if model.config.REGRESS_ORI:

        if model.config.ORIENTATION_PARAM == 'quaternion':
            q_est = results[0]['ori']
        elif model.config.ORIENTATION_PARAM == 'euler_angles':
            q_est = se3lib.SO32quat(
                se3lib.euler2SO3_left(results[0]['ori'][0],
                                      results[0]['ori'][1],
                                      results[0]['ori'][2]))
        elif model.config.ORIENTATION_PARAM == 'angle_axis':
            theta = np.linalg.norm(results[0]['ori'])
            if theta < 1e-6:
                v = [0, 0, 0]
            else:
                v = results[0]['ori'] / theta
            q_est = se3lib.angleaxis2quat(v, theta)
    else:
        ori_pmf = utils.stable_softmax(results[0]['ori'])

        # Compute mean quaternion
        q_est, q_est_cov = se3lib.quat_weighted_avg(dataset.ori_histogram_map,
                                                    ori_pmf)

        # Compute encoded error
        q_encoded_gt, _ = se3lib.quat_weighted_avg(dataset.ori_histogram_map,
                                                   ori_encoded_gt)
        ori_encoded_err = 2 * np.arccos(
            np.abs(np.asmatrix(q_encoded_gt) *
                   np.asmatrix(q_gt).transpose())) * 180 / np.pi

    # Compute errors
    angular_err = 2 * np.arccos(
        np.abs(np.asmatrix(q_est) * np.asmatrix(q_gt).transpose()))
    # angular_err_in_deg = angular_err* 180 / np.pi
    loc_err = np.linalg.norm(loc_est - loc_gt)
    loc_rel_err = loc_err / np.linalg.norm(loc_gt)

    # Compute ESA score
    esa_score = loc_rel_err + angular_err

    return loc_err, angular_err, loc_rel_err, esa_score