Ejemplo n.º 1
0
def drawTraining(image1,
                 image2,
                 pos1,
                 pos2,
                 batch,
                 idx_in_batch,
                 output,
                 save=False):
    pos1_aux = pos1.cpu().numpy()
    pos2_aux = pos2.cpu().numpy()

    k = pos1_aux.shape[1]
    col = np.random.rand(k, 3)
    n_sp = 4
    plt.figure()
    plt.subplot(1, n_sp, 1)
    im1 = imshow_image(image1[0].cpu().numpy(),
                       preprocessing=batch['preprocessing'])
    plt.imshow(im1)
    plt.scatter(pos1_aux[1, :],
                pos1_aux[0, :],
                s=0.25**2,
                c=col,
                marker=',',
                alpha=0.5)
    plt.axis('off')
    plt.subplot(1, n_sp, 2)
    plt.imshow(output['scores1'][idx_in_batch].data.cpu().numpy(), cmap='Reds')
    plt.axis('off')
    plt.subplot(1, n_sp, 3)
    im2 = imshow_image(image2[0].cpu().numpy(),
                       preprocessing=batch['preprocessing'])
    plt.imshow(im2)
    plt.scatter(pos2_aux[1, :],
                pos2_aux[0, :],
                s=0.25**2,
                c=col,
                marker=',',
                alpha=0.5)
    plt.axis('off')
    plt.subplot(1, n_sp, 4)
    plt.imshow(output['scores2'][idx_in_batch].data.cpu().numpy(), cmap='Reds')
    plt.axis('off')

    if (save == True):
        savefig('train_vis/%s.%02d.%02d.%d.png' %
                ('train' if batch['train'] else 'valid', batch['epoch_idx'],
                 batch['batch_idx'] // batch['log_interval'], idx_in_batch),
                dpi=300)
    else:
        plt.show()

    plt.close()

    im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2RGB)
    im2 = cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)

    for i in range(0, pos1_aux.shape[1], 5):
        im1 = cv2.circle(im1, (pos1_aux[1, i], pos1_aux[0, i]), 1, (0, 0, 255),
                         2)
    for i in range(0, pos2_aux.shape[1], 5):
        im2 = cv2.circle(im2, (pos2_aux[1, i], pos2_aux[0, i]), 1, (0, 0, 255),
                         2)

    im3 = cv2.hconcat([im1, im2])

    for i in range(0, pos1_aux.shape[1], 5):
        im3 = cv2.line(
            im3, (int(pos1_aux[1, i]), int(pos1_aux[0, i])),
            (int(pos2_aux[1, i]) + im1.shape[1], int(pos2_aux[0, i])),
            (0, 255, 0), 1)

    if (save == True):
        cv2.imwrite(
            'train_vis/%s.%02d.%02d.%d.png' %
            ('train_corr' if batch['train'] else 'valid', batch['epoch_idx'],
             batch['batch_idx'] // batch['log_interval'], idx_in_batch), im3)
    else:
        cv2.imshow('Image', im3)
        cv2.waitKey(0)
Ejemplo n.º 2
0
def loss_function(model,
                  batch,
                  device,
                  margin=1,
                  safe_radius=4,
                  scaling_steps=3):
    output = model({
        'image1': batch['image1'].to(device),
        'image2': batch['image2'].to(device)
    })

    loss = torch.tensor(np.array([0], dtype=np.float32), device=device)
    has_grad = False

    n_valid_samples = 0
    for idx_in_batch in range(batch['image1'].size(0)):
        # Annotations
        depth1 = batch['depth1'][idx_in_batch, :, :].to(device)  # [h1, w1]
        intrinsics1 = batch['intrinsics1'][idx_in_batch, :].to(device)  # [9]
        pose1 = batch['pose1'][idx_in_batch, :].view(4, 4).to(device)  # [4, 4]
        bbox1 = batch['bbox1'][idx_in_batch, :].to(device)  # [2]

        depth2 = batch['depth2'][idx_in_batch, :, :].to(device)
        intrinsics2 = batch['intrinsics2'][idx_in_batch, :].to(device)
        pose2 = batch['pose2'][idx_in_batch, :].view(4, 4).to(device)
        bbox2 = batch['bbox2'][idx_in_batch, :].to(device)

        # Network output
        dense_features1 = output['dense_features1'][idx_in_batch, :, :, :]
        c, h1, w1 = dense_features1.size()
        scores1 = output['scores1'][idx_in_batch, :, :].view(-1)

        dense_features2 = output['dense_features2'][idx_in_batch, :, :, :]
        _, h2, w2 = dense_features2.size()
        scores2 = output['scores2'][idx_in_batch, :, :]

        all_descriptors1 = F.normalize(dense_features1.view(c, -1), dim=0)
        descriptors1 = all_descriptors1

        all_descriptors2 = F.normalize(dense_features2.view(c, -1), dim=0)

        # Warp the positions from image 1 to image 2
        fmap_pos1 = grid_positions(h1, w1, device)
        pos1 = upscale_positions(fmap_pos1, scaling_steps=scaling_steps)
        try:
            pos1, pos2, ids = warp(pos1, depth1, intrinsics1, pose1, bbox1,
                                   depth2, intrinsics2, pose2, bbox2)  # [2, _]
        except EmptyTensorError:
            continue
        fmap_pos1 = fmap_pos1[:, ids]
        descriptors1 = descriptors1[:, ids]
        scores1 = scores1[ids]

        # Skip the pair if not enough GT correspondences are available
        if ids.size(0) < 128:
            continue

        # Descriptors at the corresponding positions
        fmap_pos2 = torch.round(
            downscale_positions(pos2, scaling_steps=scaling_steps)).long()
        descriptors2 = F.normalize(dense_features2[:, fmap_pos2[0, :],
                                                   fmap_pos2[1, :]],
                                   dim=0)
        positive_distance = 2 - 2 * (descriptors1.t().unsqueeze(
            1) @ descriptors2.t().unsqueeze(2)).squeeze()

        all_fmap_pos2 = grid_positions(h2, w2, device)
        position_distance = torch.max(torch.abs(
            fmap_pos2.unsqueeze(2).float() - all_fmap_pos2.unsqueeze(1)),
                                      dim=0)[0]
        is_out_of_safe_radius = position_distance > safe_radius
        distance_matrix = 2 - 2 * (descriptors1.t() @ all_descriptors2)
        negative_distance2 = torch.min(
            distance_matrix + (1 - is_out_of_safe_radius.float()) * 10.,
            dim=1)[0]

        all_fmap_pos1 = grid_positions(h1, w1, device)
        position_distance = torch.max(torch.abs(
            fmap_pos1.unsqueeze(2).float() - all_fmap_pos1.unsqueeze(1)),
                                      dim=0)[0]
        is_out_of_safe_radius = position_distance > safe_radius
        distance_matrix = 2 - 2 * (descriptors2.t() @ all_descriptors1)
        negative_distance1 = torch.min(
            distance_matrix + (1 - is_out_of_safe_radius.float()) * 10.,
            dim=1)[0]

        diff = positive_distance - torch.min(negative_distance1,
                                             negative_distance2)

        scores2 = scores2[fmap_pos2[0, :], fmap_pos2[1, :]]

        loss = loss + torch.sum(scores1 * scores2 * F.relu(margin + diff)
                                ) / torch.sum(scores1 * scores2)
        has_grad = True
        n_valid_samples += 1

        if batch['batch_idx'] % batch['log_interval'] == 0:
            pos1_aux = pos1.cpu().numpy()
            pos2_aux = pos2.cpu().numpy()
            k = pos1_aux.shape[1]
            col = np.random.rand(k, 3)
            n_sp = 4
            plt.figure()
            plt.subplot(1, n_sp, 1)
            im1 = imshow_image(
                batch['image1'][idx_in_batch, :, :, :].cpu().numpy(),
                preprocessing=batch['preprocessing'])
            plt.imshow(im1)
            plt.scatter(pos1_aux[1, :],
                        pos1_aux[0, :],
                        s=0.25**2,
                        c=col,
                        marker=',',
                        alpha=0.5)
            plt.axis('off')
            plt.subplot(1, n_sp, 2)
            plt.imshow(
                output['scores1'][idx_in_batch, :, :].data.cpu().numpy(),
                cmap='Reds')
            plt.axis('off')
            plt.subplot(1, n_sp, 3)
            im2 = imshow_image(
                batch['image2'][idx_in_batch, :, :, :].cpu().numpy(),
                preprocessing=batch['preprocessing'])
            plt.imshow(im2)
            plt.scatter(pos2_aux[1, :],
                        pos2_aux[0, :],
                        s=0.25**2,
                        c=col,
                        marker=',',
                        alpha=0.5)
            plt.axis('off')
            plt.subplot(1, n_sp, 4)
            plt.imshow(
                output['scores2'][idx_in_batch, :, :].data.cpu().numpy(),
                cmap='Reds')
            plt.axis('off')
            savefig(
                'train_vis/%s/%02d.%02d.%d.png' %
                ('train' if batch['train'] else 'valid', batch['epoch_idx'],
                 batch['batch_idx'] // batch['log_interval'], idx_in_batch),
                dpi=300)
            plt.close()

    if not has_grad:
        raise NoGradientError

    loss = loss / n_valid_samples

    return loss
Ejemplo n.º 3
0
def loss_function(model,
                  batch,
                  device,
                  margin=1,
                  safe_radius=4,
                  scaling_steps=3,
                  plot=False):
    output = model(
        {  # ['dense_features1', 'scores1', 'dense_features2', 'scores2']
            'image1': batch['image1'].to(device),
            'image2': batch['image2'].to(device)
        })

    loss = torch.tensor(np.array([0], dtype=np.float32), device=device)
    has_grad = False

    n_valid_samples = 0
    for idx_in_batch in range(batch['image1'].size(0)):
        # Annotations
        depth1 = batch['depth1'][idx_in_batch].to(
            device)  # [h1, w1] (256, 256)
        intrinsics1 = batch['intrinsics1'][idx_in_batch].to(device)  # [3, 3]
        pose1 = batch['pose1'][idx_in_batch].view(4, 4).to(
            device)  # [4, 4] extrinsics
        bbox1 = batch['bbox1'][idx_in_batch].to(device)  # [2] top_left_corner

        depth2 = batch['depth2'][idx_in_batch].to(device)
        intrinsics2 = batch['intrinsics2'][idx_in_batch].to(device)
        pose2 = batch['pose2'][idx_in_batch].view(4, 4).to(device)
        bbox2 = batch['bbox2'][idx_in_batch].to(device)

        # Network output
        # (512, 32, 32)
        dense_features1 = output['dense_features1'][idx_in_batch]
        c, h1, w1 = dense_features1.size(
        )  # c=512, h1=32, w1=32 # TODO rename c to c1
        scores1 = output['scores1'][idx_in_batch].view(-1)  # 32*32=1024

        dense_features2 = output['dense_features2'][idx_in_batch]
        _, h2, w2 = dense_features2.size()  # TODO assert c2 == c1
        scores2 = output['scores2'][idx_in_batch]

        all_descriptors1 = F.normalize(dense_features1.view(c, -1),
                                       dim=0)  # (512, 32*32=1024)
        descriptors1 = all_descriptors1

        all_descriptors2 = F.normalize(dense_features2.view(c, -1), dim=0)

        # Warp the positions from image 1 to image 2
        # 制作 32*32 大小的棋盘 # (2, 1024) 坐标从 [0, 0] 到 [31, 31]
        fmap_pos1 = grid_positions(h1, w1, device)
        # 上采样棋盘坐标, 因为 VGG 提特征将 (256, 256) 的图像缩小到了 (32, 32) # 上采样会损失很多定位精度 #
        # shape 是 [2, 1024], 将 32*32 个 xy 坐标上采样, 变为 [3.5, 3.5] 到 [251.5, 251.5]
        pos1 = upscale_positions(fmap_pos1, scaling_steps=scaling_steps)
        try:  # 将图像1的点变换到图像2上, 但仅仅保留那些比较好的点, 用下标 ids 记录 # '好' 的定义是 1024 个点在变换前后对应的深度图的深度大于0, 并且没有越界, 并且估计和采样的深度误差在 0.05 范围内的点
            pos1, pos2, ids = warp(  # 数量从 1024 降到了 173
                pos1, depth1, intrinsics1, pose1, bbox1, depth2, intrinsics2,
                pose2, bbox2)
        except EmptyTensorError:
            continue
        fmap_pos1 = fmap_pos1[:, ids]  # 找到对应的 采样前棋盘坐标 fmap_pos1
        descriptors1 = descriptors1[:, ids]  # 找到对应的特征
        scores1 = scores1[ids]  # 找到对应的响应值

        # Skip the pair if not enough GT correspondences are available
        if ids.size(0) < 128:
            continue

        # Descriptors at the corresponding positions
        fmap_pos2 = torch.round(  # 将从 pos1 变换过来的 pos2 再降采样到 (32, 32) 的尺度, 并取最近整数
            downscale_positions(pos2, scaling_steps=scaling_steps)).long()
        descriptors2 = F.normalize(  # 取 fmap_pos2 对应的 特征
            dense_features2[:, fmap_pos2[0, :], fmap_pos2[1, :]],
            dim=0)
        positive_distance = 2 - 2 * (  # 计算配对 descriptor 的相似度, 值域是 [0, 2], 值越小代表越相似
            descriptors1.t().unsqueeze(1) @ descriptors2.t().unsqueeze(2)
        ).squeeze(
        )  # (173, 1, 512) @ (173, 512, 1) => (173, 1, 1) => squeeze => (173)

        all_fmap_pos2 = grid_positions(h2, w2, device)  # 制作 32*32 大小的棋盘
        position_distance = torch.max(
            torch.abs(
                fmap_pos2.unsqueeze(2).float() - all_fmap_pos2.unsqueeze(1)),
            dim=0
        )[0]  # (173, 1024) # 计算 fmap_pos2 到 棋盘每个点的距离(取像素 x 坐标距离和像素 y 坐标距离较大的值)
        # safe_radius=4 如果超出 4 个像素就视为不是该点近邻的点
        is_out_of_safe_radius = position_distance > safe_radius
        # (173, 1024) # 计算图像1点和所有图2点的特征距离
        distance_matrix = 2 - 2 * (descriptors1.t() @ all_descriptors2)
        negative_distance2 = torch.min(  # 剔除掉近邻的点, 也就是在4个像素距离内(包含4个像素)的点, 然后找 descriptor 1 的 hardest negative sample
            distance_matrix + (1 - is_out_of_safe_radius.float()) * 10.,
            dim=1)[0]  # (173) 找到 hardest negative sample
        # 将刚才针对 图像1 的计算再在 图像2 上进行一遍来找 descriptor 2 的 hardest negative sample
        all_fmap_pos1 = grid_positions(h1, w1, device)
        position_distance = torch.max(torch.abs(
            fmap_pos1.unsqueeze(2).float() - all_fmap_pos1.unsqueeze(1)),
                                      dim=0)[0]
        is_out_of_safe_radius = position_distance > safe_radius
        distance_matrix = 2 - 2 * (descriptors2.t() @ all_descriptors1)
        negative_distance1 = torch.min(
            distance_matrix + (1 - is_out_of_safe_radius.float()) * 10.,
            dim=1)[0]
        # hard loss
        diff = positive_distance - torch.min(negative_distance1,
                                             negative_distance2)

        scores2 = scores2[fmap_pos2[0, :],
                          fmap_pos2[1, :]]  # (173) # 取出 score 2

        loss = loss + (  # 这里用 F.relu 取代了 hard_loss 中的 max() 函数
            torch.sum(scores1 * scores2 * F.relu(margin + diff)) /
            torch.sum(scores1 * scores2)
        )  # 这里希望 F.relu(margin + diff) 越小的话 scores1 * scores2 越大越好

        has_grad = True  # 如果能运行到这里, 那么代表这个 训练样本 经受了考验
        n_valid_samples += 1

        if plot and batch['batch_idx'] % batch['log_interval'] == 0:
            pos1_aux = pos1.cpu().numpy()
            pos2_aux = pos2.cpu().numpy()
            k = pos1_aux.shape[1]
            col = np.random.rand(k, 3)
            n_sp = 4
            plt.figure()
            plt.subplot(1, n_sp, 1)
            im1 = imshow_image(batch['image1'][idx_in_batch].cpu().numpy(),
                               preprocessing=batch['preprocessing'])
            plt.imshow(im1)
            plt.scatter(pos1_aux[1, :],
                        pos1_aux[0, :],
                        s=0.25**2,
                        c=col,
                        marker=',',
                        alpha=0.5)
            plt.axis('off')

            plt.subplot(1, n_sp, 2)
            plt.imshow(output['scores1'][idx_in_batch].data.cpu().numpy(),
                       cmap='Reds')
            plt.axis('off')

            plt.subplot(1, n_sp, 3)
            im2 = imshow_image(batch['image2'][idx_in_batch].cpu().numpy(),
                               preprocessing=batch['preprocessing'])
            plt.imshow(im2)
            plt.scatter(pos2_aux[1, :],
                        pos2_aux[0, :],
                        s=0.25**2,
                        c=col,
                        marker=',',
                        alpha=0.5)
            plt.axis('off')

            plt.subplot(1, n_sp, 4)
            plt.imshow(output['scores2'][idx_in_batch].data.cpu().numpy(),
                       cmap='Reds')
            plt.axis('off')

            savefig(
                'train_vis/%s.%02d.%02d.%d.png' %
                ('train' if batch['train'] else 'valid', batch['epoch_idx'],
                 batch['batch_idx'] // batch['log_interval'], idx_in_batch),
                dpi=300)
            plt.close()

    if not has_grad:
        raise NoGradientError

    loss = loss / n_valid_samples

    return loss