Esempio n. 1
0
def getAbsolutePoses(poses):
    """ Return absolute poses from poses snippets (relative poses) """
    poses = np.array(poses)

    for i, pose in enumerate(poses):
        pose = pose.cpu()[0]
        pose = torch.cat(
            [pose[:5 // 2],
             torch.zeros(1, 6).float(), pose[5 // 2:]])
        inv_transform_matrices = pose_vec2mat(pose,
                                              rotation_mode='euler').double()
        rot_matrices = torch.inverse(inv_transform_matrices[:, :, :3])
        tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:]
        transform_matrices = torch.cat([rot_matrices, tr_vectors], axis=-1)
        first_inv_transform = inv_transform_matrices[0]
        final_poses = first_inv_transform[:, :3] @ transform_matrices
        final_poses[:, :, -1:] += first_inv_transform[:, -1:]
        poses[i] = final_poses

    for i in range(1, len(poses)):
        r = poses[i - 1][1]
        poses[i] = r[:, :3] @ poses[i]
        poses[i][:, :, -1] = poses[i][:, :, -1] + r[:, -1]

    return poses[-1][:, :, -1]
Esempio n. 2
0
def pose_vec2points(file):
    '''
        把所有的pose变成其次坐标点
    :return:
    '''
    #origin = torch.tensor([0,0,0])
    poses_all = np.load(file)  #[b,4,6]

    frame_poses_list = [[] for i in range(poses_all.shape[0])]
    for i in range(poses_all.shape[0]):  #b
        for j in range(i, i + poses_all.shape[1]):  #0~5,1~6
            if j < poses_all.shape[0]:
                frame_poses_list[j].append(poses_all[i, j - i, :])
            else:
                break
    #去除空值
    for i in range(2, len(frame_poses_list)):
        if i == 2:
            frame_poses_list[i].pop(0)
        elif i == 3:
            frame_poses_list[i].pop(1)
        else:
            frame_poses_list[i].pop(2)

    #求和平均,得到一个list
    batch_pose_vec = None  #at last [b(96),6]
    for i in range(len(frame_poses_list)):
        nump = np.zeros(6)
        for j in range(len(frame_poses_list[i])):
            nump += np.array(frame_poses_list[i][j])
        nump /= len(frame_poses_list[i])
        if i == 0:
            batch_pose_vec = nump.reshape(1, -1)
        else:
            batch_pose_vec = np.concatenate(
                [batch_pose_vec, nump.reshape(1, -1)])

    #6d-tensor 2 matrix
    batch_pose_vec = torch.tensor(batch_pose_vec)
    batch_pose_mat = pose_vec2mat(batch_pose_vec)

    origin = torch.tensor([[0.], [0.], [0.], [1.]]).double()
    point = origin
    points = None  #last [b,4]齐次坐标
    for i in range(batch_pose_mat.shape[0]):
        point = batch_pose_mat[i] @ point
        point = torch.cat([point, torch.ones([1, 1]).double()])
        if i == 0:
            points = point.unsqueeze(0)
        else:
            points = torch.cat([points, point.unsqueeze(0)])
    ret_file_name = dataset_name + '_corrds.npy'
    np.save(ret_file_name, points.detach().numpy())
    return ret_file_name
Esempio n. 3
0
 def pose2mat(pose):
     """
     param:
         pose: only one single pose
     """
     if pose.shape == (4, 4):
         return pose
     if pose.shape[0] == 6:
         from inverse_warp import pose_vec2mat
         pose_mat = pose_vec2mat(torch.tensor(
             pose[np.newaxis, ...])).squeeze(0).cpu().numpy()
         pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])])
     if len(pose.shape) == 2 and pose.shape[1] == 6:
         from inverse_warp import pose_vec2mat
         pose_mat = pose_vec2mat(
             torch.tensor(pose)).squeeze(0).cpu().numpy()
         pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])])
     if pose.shape == (3, 4):
         pose_mat = np.vstack([pose, np.array([0, 0, 0, 1])])
     return pose_mat
     pass
Esempio n. 4
0
def pose2tf_mat(rotation_mode, imgs, poses):
    poses = poses.cpu()[0]
    poses = torch.cat([poses[:len(imgs) // 2], torch.zeros(1, 6).float(), poses[len(imgs) // 2:]])
    inv_transform_matrices = pose_vec2mat(poses, rotation_mode=rotation_mode).numpy().astype(np.float64)
    rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3])
    tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:]
    transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1)
    # 将对[0 1 2]中间1的转换矩阵变成对0的位姿转换:T(0->0),T(1->0),T(2->0)
    first_inv_transform = inv_transform_matrices[0]
    final_poses = first_inv_transform[:, :3] @ transform_matrices
    final_poses[:, :, -1:] += first_inv_transform[:, -1:]
    return final_poses
Esempio n. 5
0
    def one_scale(depth, explainability_mask):
        assert(explainability_mask is None or depth.size()[2:] == explainability_mask.size()[2:])
        tgt_img = ref_imgs[0]
        reconstruction_loss = 0
        b, _, h, w = depth.size()
        downscale = tgt_img.size(2)/h
        
        ref_img = ref_imgs[-1]
        
        tgt_img_scaled = F.interpolate(tgt_img, (h, w), mode='area')
        ref_imgs_scaled = F.interpolate(ref_img, (h, w), mode='area')
        intrinsics_scaled = torch.cat((intrinsics[:, 0:2]/downscale, intrinsics[:, 2:]), dim=1)

        
        #current_pose = pose[:, i]
        pose21 = pose[:,0]
        pose23 = pose[:,1]
        T21 = homomat(pose_vec2mat(pose21))
        
        T23 = homomat(pose_vec2mat(pose21))
        T12 = T21.inverse()
        T13 = torch.bmm(T23, T12)
        T13 = T13[:,:3]

        ref_img_warped, valid_points = inverse_warp(ref_img, depth[:,0], T13,
                                                    intrinsics_scaled,
                                                    'mat', padding_mode)
        
        diff = (tgt_img_scaled - ref_img_warped) * valid_points.unsqueeze(1).float()

        if explainability_mask is not None:
            diff = diff * explainability_mask[:,i:i+1].expand_as(diff)

        reconstruction_loss += diff.abs().mean()
        assert((reconstruction_loss == reconstruction_loss).item() == 1)


        return reconstruction_loss
Esempio n. 6
0
def main():
    args = parser.parse_args()

    weights_pose = torch.load(args.pretrained_posenet)
    pose_net = models.PoseResNet().to(device)
    pose_net.load_state_dict(weights_pose['state_dict'], strict=False)
    pose_net.eval()

    image_dir = Path(args.dataset_dir + args.sequence + "/image_2/")
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()

    test_files = sum(
        [image_dir.files('*.{}'.format(ext)) for ext in args.img_exts], [])
    test_files.sort()

    print('{} files to test'.format(len(test_files)))
    print(test_files)

    global_pose = np.eye(4)
    poses = [global_pose[0:3, :].reshape(1, 12)]

    n = len(test_files)
    tensor_img1 = load_tensor_image(test_files[0], args)

    for iter in tqdm(range(n - 1)):

        tensor_img2 = load_tensor_image(test_files[iter + 1], args)

        pose = pose_net(tensor_img1, tensor_img2)

        pose_mat = pose_vec2mat(pose).squeeze(0).cpu().numpy()
        pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])])
        global_pose = global_pose @ np.linalg.inv(pose_mat)

        poses.append(global_pose[0:3, :].reshape(1, 12))

        # update
        tensor_img1 = tensor_img2

    poses = np.concatenate(poses, axis=0)
    filename = Path(args.output_dir + args.sequence + ".txt")
    np.savetxt(filename, poses, delimiter=' ', fmt='%1.8e')
Esempio n. 7
0
def validate_with_gt(args,
                     val_loader,
                     depth_net,
                     pose_net,
                     epoch,
                     logger,
                     output_writers=[],
                     **env):
    global device
    batch_time = AverageMeter()
    depth_error_names = ['abs diff', 'abs rel', 'sq rel', 'a1', 'a2', 'a3']
    stab_depth_errors = AverageMeter(i=len(depth_error_names))
    unstab_depth_errors = AverageMeter(i=len(depth_error_names))
    pose_error_names = ['Absolute Trajectory Error', 'Rotation Error']
    pose_errors = AverageMeter(i=len(pose_error_names))

    # switch to evaluate mode
    depth_net.eval()
    pose_net.eval()

    end = time.time()
    logger.valid_bar.update(0)
    for i, sample in enumerate(val_loader):
        log_output = i < len(output_writers)

        imgs = torch.stack(sample['imgs'], dim=1).to(device)
        batch_size, seq, c, h, w = imgs.size()

        intrinsics = sample['intrinsics'].to(device)
        intrinsics_inv = sample['intrinsics_inv'].to(device)

        if args.network_input_size is not None:
            imgs = F.interpolate(imgs, (c, *args.network_input_size),
                                 mode='area')

            downscale = h / args.network_input_size[0]
            intrinsics = torch.cat(
                (intrinsics[:, 0:2] / downscale, intrinsics[:, 2:]), dim=1)
            intrinsics_inv = torch.cat(
                (intrinsics_inv[:, :, 0:2] * downscale, intrinsics_inv[:, :,
                                                                       2:]),
                dim=2)

        GT_depth = sample['depth'].to(device)
        GT_pose = sample['pose'].to(device)

        mid_index = (args.sequence_length - 1) // 2

        tgt_img = imgs[:, mid_index]

        if epoch == 1 and log_output:
            for j, img in enumerate(sample['imgs']):
                output_writers[i].add_image('val Input', tensor2array(img[0]),
                                            j)
            depth_to_show = GT_depth[0].cpu()
            # KITTI Like data routine to discard invalid data
            depth_to_show[depth_to_show == 0] = 1000
            disp_to_show = (1 / depth_to_show).clamp(0, 10)
            output_writers[i].add_image(
                'val target Disparity Normalized',
                tensor2array(disp_to_show, max_value=None, colormap='bone'),
                epoch)

        poses = pose_net(imgs)
        pose_matrices = pose_vec2mat(poses,
                                     args.rotation_mode)  # [B, seq, 3, 4]
        inverted_pose_matrices = invert_mat(pose_matrices)
        pose_errors.update(
            compute_pose_error(GT_pose[:, :-1],
                               inverted_pose_matrices.data[:, :-1]))

        tgt_poses = pose_matrices[:, mid_index]  # [B, 3, 4]
        compensated_predicted_poses = compensate_pose(pose_matrices, tgt_poses)
        compensated_GT_poses = compensate_pose(GT_pose, GT_pose[:, mid_index])

        for j in range(args.sequence_length):
            if j == mid_index:
                if log_output and epoch == 1:
                    output_writers[i].add_image(
                        'val Input Stabilized',
                        tensor2array(sample['imgs'][j][0]), j)
                continue
            '''compute displacement magnitude for each element of batch, and rescale
            depth accordingly.'''

            prior_img = imgs[:, j]
            displacement = compensated_GT_poses[:, j, :, -1]  # [B,3]
            displacement_magnitude = displacement.norm(p=2, dim=1)  # [B]
            current_GT_depth = GT_depth * args.nominal_displacement / displacement_magnitude.view(
                -1, 1, 1)

            prior_predicted_pose = compensated_predicted_poses[:,
                                                               j]  # [B, 3, 4]
            prior_GT_pose = compensated_GT_poses[:, j]

            prior_predicted_rot = prior_predicted_pose[:, :, :-1]
            prior_GT_rot = prior_GT_pose[:, :, :-1].transpose(1, 2)

            prior_compensated_from_GT = inverse_rotate(prior_img, prior_GT_rot,
                                                       intrinsics,
                                                       intrinsics_inv)
            if log_output and epoch == 1:
                depth_to_show = current_GT_depth[0]
                output_writers[i].add_image(
                    'val target Depth {}'.format(j),
                    tensor2array(depth_to_show, max_value=args.max_depth),
                    epoch)
                output_writers[i].add_image(
                    'val Input Stabilized',
                    tensor2array(prior_compensated_from_GT[0]), j)

            prior_compensated_from_prediction = inverse_rotate(
                prior_img, prior_predicted_rot, intrinsics, intrinsics_inv)
            predicted_input_pair = torch.cat(
                [prior_compensated_from_prediction, tgt_img],
                dim=1)  # [B, 6, W, H]
            GT_input_pair = torch.cat([prior_compensated_from_GT, tgt_img],
                                      dim=1)  # [B, 6, W, H]

            # This is the depth from footage stabilized with GT pose, it should be better than depth from raw footage without any GT info
            raw_depth_stab = depth_net(GT_input_pair)
            raw_depth_unstab = depth_net(predicted_input_pair)

            # Upsample depth so that it matches GT size
            scale_factor = GT_depth.size(-1) // raw_depth_stab.size(-1)
            depth_stab = F.interpolate(raw_depth_stab,
                                       scale_factor=scale_factor,
                                       mode='bilinear',
                                       align_corners=False)
            depth_unstab = F.interpolate(raw_depth_unstab,
                                         scale_factor=scale_factor,
                                         mode='bilinear',
                                         align_corners=False)

            for k, depth in enumerate([depth_stab, depth_unstab]):
                disparity = 1 / depth
                errors = stab_depth_errors if k == 0 else unstab_depth_errors
                errors.update(
                    compute_depth_errors(current_GT_depth, depth, crop=True))
                if log_output:
                    prefix = 'stabilized' if k == 0 else 'unstabilized'
                    output_writers[i].add_image(
                        'val {} Dispnet Output Normalized {}'.format(
                            prefix, j),
                        tensor2array(disparity[0],
                                     max_value=None,
                                     colormap='bone'), epoch)
                    output_writers[i].add_image(
                        'val {} Depth Output {}'.format(prefix, j),
                        tensor2array(depth[0], max_value=args.max_depth),
                        epoch)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        logger.valid_bar.update(i + 1)
        if i % args.print_freq == 0:
            logger.valid_writer.write(
                'valid: Time {} ATE Error {:.4f} ({:.4f}), Unstab Rel Abs Error {:.4f} ({:.4f})'
                .format(batch_time, pose_errors.val[0], pose_errors.avg[0],
                        unstab_depth_errors.val[1],
                        unstab_depth_errors.avg[1]))
    logger.valid_bar.update(len(val_loader))

    errors = (*pose_errors.avg, *unstab_depth_errors.avg,
              *stab_depth_errors.avg)
    error_names = (*pose_error_names,
                   *['unstab {}'.format(e) for e in depth_error_names],
                   *['stab {}'.format(e) for e in depth_error_names])

    return OrderedDict(zip(error_names, errors))
Esempio n. 8
0
def validate_without_gt(args,
                        val_loader,
                        depth_net,
                        pose_net,
                        epoch,
                        logger,
                        output_writers=[],
                        **env):
    global device
    batch_time = AverageMeter()
    losses = AverageMeter(i=3, precision=4)
    w1, w2, w3 = args.photo_loss_weight, args.smooth_loss_weight, args.ssim
    if args.log_output:
        poses_values = np.zeros(((len(val_loader) - 1) * args.test_batch_size *
                                 (args.sequence_length - 1), 6))
        disp_values = np.zeros(
            ((len(val_loader) - 1) * args.test_batch_size * 3))

    # switch to evaluate mode
    depth_net.eval()
    pose_net.eval()

    upsample_depth_net = models.UpSampleNet(depth_net, args.network_input_size)

    end = time.time()
    logger.valid_bar.update(0)

    for i, sample in enumerate(val_loader):
        log_output = i < len(output_writers)

        imgs = torch.stack(sample['imgs'], dim=1).to(device)
        intrinsics = sample['intrinsics'].to(device)
        intrinsics_inv = sample['intrinsics_inv'].to(device)

        if epoch == 1 and log_output:
            for j, img in enumerate(sample['imgs']):
                output_writers[i].add_image('val Input', tensor2array(img[0]),
                                            j)

        batch_size, seq = imgs.size()[:2]

        if args.network_input_size is not None:
            h, w = args.network_input_size
            downsample_imgs = F.interpolate(imgs, (3, h, w), mode='area')
            poses = pose_net(downsample_imgs)  # [B, seq, 6]
        else:
            poses = pose_net(imgs)

        pose_matrices = pose_vec2mat(poses,
                                     args.rotation_mode)  # [B, seq, 3, 4]

        mid_index = (args.sequence_length - 1) // 2

        tgt_imgs = imgs[:, mid_index]  # [B, 3, H, W]
        tgt_poses = pose_matrices[:, mid_index]  # [B, 3, 4]
        compensated_poses = compensate_pose(
            pose_matrices,
            tgt_poses)  # [B, seq, 3, 4] tgt_poses are now neutral pose

        ref_indices = list(range(args.sequence_length))
        ref_indices.remove(mid_index)

        loss_1 = 0
        loss_2 = 0

        for ref_index in ref_indices:
            prior_imgs = imgs[:, ref_index]
            prior_poses = compensated_poses[:, ref_index]  # [B, 3, 4]

            prior_imgs_compensated = inverse_rotate(prior_imgs,
                                                    prior_poses[:, :, :3],
                                                    intrinsics, intrinsics_inv)
            input_pair = torch.cat([prior_imgs_compensated, tgt_imgs],
                                   dim=1)  # [B, 6, W, H]

            predicted_magnitude = prior_poses[:, :, -1:].norm(
                p=2, dim=1, keepdim=True).unsqueeze(1)  # [B, 1, 1, 1]
            scale_factor = args.nominal_displacement / predicted_magnitude
            normalized_translation = compensated_poses[:, :, :,
                                                       -1:] * scale_factor  # [B, seq, 3, 1]
            new_pose_matrices = torch.cat(
                [compensated_poses[:, :, :, :-1], normalized_translation],
                dim=-1)

            depth = upsample_depth_net(input_pair)
            disparity = 1 / depth
            total_indices = torch.arange(seq).long().unsqueeze(0).expand(
                batch_size, seq).to(device)
            tgt_id = total_indices[:, mid_index]

            ref_indices = total_indices[
                total_indices != tgt_id.unsqueeze(1)].view(
                    batch_size, seq - 1)

            photo_loss, diff_maps, warped_imgs = photometric_reconstruction_loss(
                imgs,
                tgt_id,
                ref_indices,
                depth,
                new_pose_matrices,
                intrinsics,
                intrinsics_inv,
                args.rotation_mode,
                ssim_weight=w3)

            loss_1 += photo_loss

            if log_output:
                output_writers[i].add_image(
                    'val Dispnet Output Normalized {}'.format(ref_index),
                    tensor2array(disparity[0], max_value=None,
                                 colormap='bone'), epoch)
                output_writers[i].add_image(
                    'val Depth Output {}'.format(ref_index),
                    tensor2array(depth[0].cpu(), max_value=args.max_depth),
                    epoch)
                for j, (diff, warped) in enumerate(zip(diff_maps,
                                                       warped_imgs)):
                    output_writers[i].add_image(
                        'val Warped Outputs {} {}'.format(j, ref_index),
                        tensor2array(warped[0]), epoch)
                    output_writers[i].add_image(
                        'val Diff Outputs {} {}'.format(j, ref_index),
                        tensor2array(diff[0].abs() - 1), epoch)

            loss_2 += texture_aware_smooth_loss(
                disparity, tgt_imgs if args.texture_loss else None)

        if args.log_output and i < len(val_loader) - 1:
            step = args.test_batch_size * (args.sequence_length - 1)
            poses_values[i * step:(i + 1) * step] = poses[:, :-1].cpu().view(
                -1, 6).numpy()
            step = args.test_batch_size * 3
            disp_unraveled = disparity.cpu().view(args.test_batch_size, -1)
            disp_values[i * step:(i + 1) * step] = torch.cat([
                disp_unraveled.min(-1)[0],
                disp_unraveled.median(-1)[0],
                disp_unraveled.max(-1)[0]
            ]).numpy()

        loss = w1 * loss_1 + w2 * loss_2
        losses.update([loss.item(), loss_1.item(), loss_2.item()])

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        logger.valid_bar.update(i + 1)
        if i % args.print_freq == 0:
            logger.valid_writer.write('valid: Time {} Loss {}'.format(
                batch_time, losses))

    if args.log_output:
        rot_coeffs = ['rx', 'ry', 'rz'] if args.rotation_mode == 'euler' else [
            'qx', 'qy', 'qz'
        ]
        tr_coeffs = ['tx', 'ty', 'tz']
        for k, (coeff_name) in enumerate(tr_coeffs + rot_coeffs):
            output_writers[0].add_histogram('val poses_{}'.format(coeff_name),
                                            poses_values[:, k], epoch)
        output_writers[0].add_histogram('disp_values', disp_values, epoch)
    logger.valid_bar.update(len(val_loader))
    return OrderedDict(
        zip(['Total loss', 'Photo loss', 'Smooth loss'], losses.avg))
Esempio n. 9
0
def train_one_epoch(args, train_loader, depth_net, pose_net, optimizer, epoch,
                    n_iter, logger, training_writer, **env):
    global device
    logger.reset_train_bar()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter(precision=4)
    w1, w2, w3 = args.photo_loss_weight, args.smooth_loss_weight, args.ssim
    e1, e2 = args.training_milestones

    # switch to train mode
    depth_net.train()
    pose_net.train()

    upsample_depth_net = models.UpSampleNet(depth_net, args.network_input_size)

    end = time.time()
    logger.train_bar.update(0)

    for i, sample in enumerate(train_loader):

        log_losses = i > 0 and n_iter % args.print_freq == 0
        log_output = args.training_output_freq > 0 and n_iter % args.training_output_freq == 0

        # measure data loading time
        data_time.update(time.time() - end)
        imgs = torch.stack(sample['imgs'], dim=1).to(device)
        intrinsics = sample['intrinsics'].to(device)
        intrinsics_inv = sample['intrinsics_inv'].to(device)

        batch_size, seq = imgs.size()[:2]

        if args.network_input_size is not None:
            h, w = args.network_input_size
            downsample_imgs = F.interpolate(imgs, (3, h, w), mode='area')
            poses = pose_net(downsample_imgs)  # [B, seq, 6]
        else:
            poses = pose_net(imgs)

        pose_matrices = pose_vec2mat(poses,
                                     args.rotation_mode)  # [B, seq, 3, 4]

        total_indices = torch.arange(seq).long().to(device).unsqueeze(
            0).expand(batch_size, seq)
        batch_range = torch.arange(batch_size).long().to(device)
        ''' for each element of the batch select a random picture in the sequence to
        which we will compute the depth, all poses are then converted so that pose of this
        very picture is exactly identity. At first this image is always in the middle of the sequence'''

        if epoch > e2:
            tgt_id = torch.floor(torch.rand(batch_size) *
                                 seq).long().to(device)
        else:
            tgt_id = torch.zeros(batch_size).long().to(
                device) + args.sequence_length // 2
        '''
        Select what other picture we are going to feed DepthNet, it must not be the same
        as tgt_id. At first, it's always first picture of the sequence, it is randomly chosen when first training milestone is reached
        '''

        ref_indices = total_indices[total_indices != tgt_id.unsqueeze(1)].view(
            batch_size, seq - 1)

        if epoch > e1:
            prior_id = torch.floor(torch.rand(batch_size) *
                                   (seq - 1)).long().to(device)
        else:
            prior_id = torch.zeros(batch_size).long().to(device)
        prior_id = ref_indices[batch_range, prior_id]

        tgt_imgs = imgs[batch_range, tgt_id]  # [B, 3, H, W]
        tgt_poses = pose_matrices[batch_range, tgt_id]  # [B, 3, 4]

        prior_imgs = imgs[batch_range, prior_id]

        compensated_poses = compensate_pose(
            pose_matrices,
            tgt_poses)  # [B, seq, 3, 4] tgt_poses are now neutral pose
        prior_poses = compensated_poses[batch_range, prior_id]  # [B, 3, 4]

        if args.supervise_pose:
            from_GT = invert_mat(sample['pose']).to(device)
            compensated_GT_poses = compensate_pose(
                from_GT, from_GT[batch_range, tgt_id])
            prior_GT_poses = compensated_GT_poses[batch_range, prior_id]
            prior_imgs_compensated = inverse_rotate(prior_imgs,
                                                    prior_GT_poses[:, :, :-1],
                                                    intrinsics, intrinsics_inv)
        else:
            prior_imgs_compensated = inverse_rotate(prior_imgs,
                                                    prior_poses[:, :, :-1],
                                                    intrinsics, intrinsics_inv)

        input_pair = torch.cat([prior_imgs_compensated, tgt_imgs],
                               dim=1)  # [B, 6, W, H]
        depth = upsample_depth_net(input_pair)
        # depth = [sample['depth'].to(device).unsqueeze(1) * 3 / abs(tgt_id[0] - prior_id[0])]
        # depth.append(torch.nn.functional.interpolate(depth[0], scale_factor=2))
        disparities = [1 / d for d in depth]

        predicted_magnitude = prior_poses[:, :,
                                          -1:].norm(p=2, dim=1,
                                                    keepdim=True).unsqueeze(1)
        scale_factor = args.nominal_displacement / (predicted_magnitude + 1e-5)
        normalized_translation = compensated_poses[:, :, :,
                                                   -1:] * scale_factor  # [B, seq_length-1, 3]
        new_pose_matrices = torch.cat(
            [compensated_poses[:, :, :, :-1], normalized_translation], dim=-1)

        biggest_scale = depth[0].size(-1)

        loss_1 = 0
        for k, scaled_depth in enumerate(depth):
            size_ratio = scaled_depth.size(-1) / biggest_scale
            loss, diff_maps, warped_imgs = photometric_reconstruction_loss(
                imgs,
                tgt_id,
                ref_indices,
                scaled_depth,
                new_pose_matrices,
                intrinsics,
                intrinsics_inv,
                args.rotation_mode,
                ssim_weight=w3)

            loss_1 += loss * size_ratio

            if log_output:
                training_writer.add_image(
                    'train Dispnet Output Normalized scale {}'.format(k),
                    tensor2array(disparities[k][0],
                                 max_value=None,
                                 colormap='bone'), n_iter)
                training_writer.add_image(
                    'train Depth Output scale {}'.format(k),
                    tensor2array(scaled_depth[0], max_value=args.max_depth),
                    n_iter)
                for j, (diff, warped) in enumerate(zip(diff_maps,
                                                       warped_imgs)):
                    training_writer.add_image(
                        'train Warped Outputs {} {}'.format(k, j),
                        tensor2array(warped[0]), n_iter)
                    training_writer.add_image(
                        'train Diff Outputs {} {}'.format(k, j),
                        tensor2array(diff.abs()[0] - 1), n_iter)

        loss_2 = texture_aware_smooth_loss(
            depth, tgt_imgs if args.texture_loss else None)

        loss = w1 * loss_1 + w2 * loss_2

        if args.supervise_pose:
            loss += (from_GT[:, :, :, :3] -
                     pose_matrices[:, :, :, :3]).abs().mean()

        if log_losses:
            training_writer.add_scalar('photometric_error', loss_1.item(),
                                       n_iter)
            training_writer.add_scalar('disparity_smoothness_loss',
                                       loss_2.item(), n_iter)
            training_writer.add_scalar('total_loss', loss.item(), n_iter)

        if log_output:
            nominal_translation_magnitude = poses[:, -2, :3].norm(p=2, dim=-1)
            # last pose is always identity and penultimate translation magnitude is always 1, so you don't need to log them
            for j in range(args.sequence_length - 2):
                trans_mag = poses[:, j, :3].norm(p=2, dim=-1)
                training_writer.add_histogram(
                    'tr {}'.format(j),
                    (trans_mag /
                     nominal_translation_magnitude).detach().cpu().numpy(),
                    n_iter)
            for j in range(args.sequence_length - 1):
                # TODO log a better value : this is magnitude of vector (yaw, pitch, roll) which is not a physical value
                rot_mag = poses[:, j, 3:].norm(p=2, dim=-1)
                training_writer.add_histogram('rot {}'.format(j),
                                              rot_mag.detach().cpu().numpy(),
                                              n_iter)

            training_writer.add_image('train Input', tensor2array(tgt_imgs[0]),
                                      n_iter)

        # record loss for average meter
        losses.update(loss.item(), args.batch_size)

        # compute gradient and do Adam step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        with open(args.save_path / args.log_full, 'a') as csvfile:
            writer = csv.writer(csvfile, delimiter='\t')
            writer.writerow([loss.item(), loss_1.item(), loss_2.item()])
        logger.train_bar.update(i + 1)
        if i % args.print_freq == 0:
            logger.train_writer.write('Train: Time {} Data {} Loss {}'.format(
                batch_time, data_time, losses))
        if i >= args.epoch_size - 1:
            break

        n_iter += 1

    return losses.avg[0], n_iter
def main():
    global tgt_pc, tgt_img
    args = parser.parse_args()
    from kitti_eval.VOLO_data_utils import test_framework_KITTI as test_framework

    weights = torch.load(args.pretrained_posenet)
    seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3)
    pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device)
    pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    framework = test_framework(dataset_dir, args.sequences, seq_length)

    print('{} snippets to test'.format(len(framework)))
    errors = np.zeros((len(framework), 2), np.float32)
    optimized_errors = np.zeros((len(framework), 2), np.float32)
    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()
        predictions_array = np.zeros((len(framework), seq_length, 3, 4))

    f=open('//')
    for j, sample in enumerate(tqdm(framework)):

        '''
        VO部分
        并计算和真值的差值
        '''
        imgs = sample['imgs']

        h,w,_ = imgs[0].shape
        if (not args.no_resize) and (h != args.img_height or w != args.img_width):
            imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs]

        imgs = [np.transpose(img, (2,0,1)) for img in imgs]

        ref_imgs = []
        for i, img in enumerate(imgs):
            img = torch.from_numpy(img).unsqueeze(0)
            img = ((img/255 - 0.5)/0.5).to(device)
            if i == len(imgs)//2:
                tgt_img = img
            else:
                ref_imgs.append(img)

        timeCostVO=0
        startTimeVO=time.time()
        _, poses = pose_net(tgt_img, ref_imgs)
        timeCostVO=time.time()-startTimeVO

        poses = poses.cpu()[0]
        poses = torch.cat([poses[:len(imgs)//2], torch.zeros(1,6).float(), poses[len(imgs)//2:]])

        inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64)

        rot_matrices = np.linalg.inv(inv_transform_matrices[:,:,:3])
        tr_vectors = -rot_matrices @ inv_transform_matrices[:,:,-1:]

        transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1)
        print('**********DeepVO result: time_cost {:.3} s'.format(timeCostVO/(len(imgs)-1)))
        #print(transform_matrices)
        first_inv_transform = inv_transform_matrices[0]
        final_poses = first_inv_transform[:,:3] @ transform_matrices
        final_poses[:,:,-1:] += first_inv_transform[:,-1:]
        # print('first')
        # print(first_inv_transform)
        print('poses')
        print(final_poses)

        if args.output_dir is not None:
            predictions_array[j] = final_poses

        ATE, RE = compute_pose_error(sample['poses'], final_poses)
        errors[j] = ATE, RE



        '''
        LO部分
        以VO的结果作为预估,并计算和真值的差值
        '''
        pointclouds=sample['pointclouds']
        from VOLO import LO
        #pointcluds是可以直接处理的
        for i, pc in enumerate(pointclouds):
            if i == len(pointclouds)//2:

                tgt_pc =pointclouds[i]

        optimized_transform_matrices=[]

        timeCostLO=0
        startTimeLO=time.time()
        totalIterations=0
        for i,pc in enumerate(pointclouds):
            pose_proposal=np.identity(4)
            pose_proposal[:3,:]=transform_matrices[i]
            print('======pose proposal for LO=====')
            print(pose_proposal)

            T,distacnces,iterations=LO(pc,tgt_pc,init_pose=pose_proposal, max_iterations=50, tolerance=0.001,LO='icp')
            optimized_transform_matrices.append(T)
            totalIterations+=iterations
            print('iterations:\n')
            print(iterations)
        timeCostLO=time.time()-startTimeLO
        optimized_transform_matrices=np.asarray(optimized_transform_matrices)

        print('*****LO result: time_cost {:.3} s'.format(timeCostLO/(len(pointclouds)-1))+' average iterations: {}'
              .format(totalIterations/(len(pointclouds)-1)))
        # print(optimized_transform_matrices)


        #TODO 打通VO-LO pipeline: 需要将转换矩阵格式对齐; 评估VO的预估对LO的增益:效率上和精度上; 评估过程可视化
        #TODO 利用数据集有对应的图像,点云和位姿真值的数据集(Kitti的odomerty)

        inv_optimized_rot_matrices = np.linalg.inv(optimized_transform_matrices[:,:3,:3])
        inv_optimized_tr_vectors = -inv_optimized_rot_matrices @ optimized_transform_matrices[:,:3,-1:]
        inv_optimized_transform_matrices = np.concatenate([inv_optimized_rot_matrices, inv_optimized_tr_vectors], axis=-1)

        first_inv_optimized_transform = inv_optimized_transform_matrices[0]
        final_optimized_poses = first_inv_optimized_transform[:,:3] @ optimized_transform_matrices[:,:3,:]
        final_optimized_poses[:,:,-1:] += first_inv_optimized_transform[:,-1:]
        # print('first')
        # print(first_inv_optimized_transform)
        print('poses')
        print(final_optimized_poses)

        if args.output_dir is not None:
            predictions_array[j] = final_poses

        optimized_ATE, optimized_RE = compute_pose_error(sample['poses'], final_optimized_poses)
        optimized_errors[j] = optimized_ATE, optimized_RE

        print('==============\n===============\n')

    mean_errors = errors.mean(0)
    std_errors = errors.std(0)
    error_names = ['ATE','RE']
    print('')
    print("Results")
    print("\t {:>10}, {:>10}".format(*error_names))
    print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors))
    print("std \t {:10.4f}, {:10.4f}".format(*std_errors))

    optimized_mean_errors = optimized_errors.mean(0)
    optimized_std_errors = optimized_errors.std(0)
    optimized_error_names = ['optimized_ATE','optimized_RE']
    print('')
    print("optimized_Results")
    print("\t {:>10}, {:>10}".format(*optimized_error_names))
    print("mean \t {:10.4f}, {:10.4f}".format(*optimized_mean_errors))
    print("std \t {:10.4f}, {:10.4f}".format(*optimized_std_errors))

    if args.output_dir is not None:
        np.save(output_dir/'predictions.npy', predictions_array)
Esempio n. 11
0
def adjust_shifts(args, train_set, adjust_loader, depth_net, pose_net, epoch,
                  logger, training_writer, **env):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    new_shifts = AverageMeter(args.sequence_length - 1, precision=2)
    pose_net.eval()
    depth_net.eval()
    upsample_depth_net = models.UpSampleNet(depth_net, args.network_input_size)

    end = time.time()

    mid_index = (args.sequence_length - 1) // 2

    # we contrain mean value of depth net output from pair 0 and mid_index
    target_values = np.arange(
        -mid_index, mid_index + 1) / (args.target_mean_depth * mid_index)
    target_values = 1 / np.abs(
        np.concatenate(
            [target_values[:mid_index], target_values[mid_index + 1:]]))

    logger.reset_train_bar(len(adjust_loader))

    for i, sample in enumerate(adjust_loader):
        index = sample['index']

        # measure data loading time
        data_time.update(time.time() - end)
        imgs = torch.stack(sample['imgs'], dim=1).to(device)
        intrinsics = sample['intrinsics'].to(device)
        intrinsics_inv = sample['intrinsics_inv'].to(device)

        # compute output
        batch_size, seq = imgs.size()[:2]

        if args.network_input_size is not None:
            h, w = args.network_input_size
            downsample_imgs = F.interpolate(imgs, (3, h, w), mode='area')
            poses = pose_net(downsample_imgs)  # [B, seq, 6]
        else:
            poses = pose_net(imgs)

        pose_matrices = pose_vec2mat(poses,
                                     args.rotation_mode)  # [B, seq, 3, 4]

        tgt_imgs = imgs[:, mid_index]  # [B, 3, H, W]
        tgt_poses = pose_matrices[:, mid_index]  # [B, 3, 4]
        compensated_poses = compensate_pose(
            pose_matrices,
            tgt_poses)  # [B, seq, 3, 4] tgt_poses are now neutral pose

        ref_indices = list(range(args.sequence_length))
        ref_indices.remove(mid_index)

        mean_depth_batch = []

        for ref_index in ref_indices:
            prior_imgs = imgs[:, ref_index]
            prior_poses = compensated_poses[:, ref_index]  # [B, 3, 4]

            prior_imgs_compensated = inverse_rotate(prior_imgs,
                                                    prior_poses[:, :, :3],
                                                    intrinsics, intrinsics_inv)
            input_pair = torch.cat([prior_imgs_compensated, tgt_imgs],
                                   dim=1)  # [B, 6, W, H]

            depth = upsample_depth_net(input_pair)  # [B, 1, H, W]
            mean_depth = depth.view(batch_size, -1).mean(-1).cpu().numpy()  # B
            mean_depth_batch.append(mean_depth)

        for j, mean_values in zip(index, np.stack(mean_depth_batch, axis=-1)):
            ratio = mean_values / target_values  # if mean value is too high, raise the shift, lower otherwise
            train_set.reset_shifts(j, ratio[:mid_index], ratio[mid_index:])
            new_shifts.update(train_set.get_shifts(j))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        logger.train_bar.update(i)
        if i % args.print_freq == 0:
            logger.train_writer.write('Adjustement:'
                                      'Time {} Data {} shifts {}'.format(
                                          batch_time, data_time, new_shifts))

    for i, shift in enumerate(new_shifts.avg):
        training_writer.add_scalar('shifts{}'.format(i), shift, epoch)

    return new_shifts.avg
Esempio n. 12
0
def main():
    args = parser.parse_args()
    # from kitti_eval.VOLO_data_utils import test_framework_KITTI as test_framework
    from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework

    weights = torch.load(args.pretrained_posenet)
    seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3)
    pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device)
    pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    sequences=[args.sequence_idx]
    framework = test_framework(dataset_dir, sequences, seq_length)

    print('{} snippets to test'.format(len(framework)))
    errors = np.zeros((len(framework), 2), np.float32)
    optimized_errors = np.zeros((len(framework), 2), np.float32)
    iteration_arr = np.zeros(len(framework))
    LO_iter_times = np.zeros(len(framework))

    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()
        predictions_array = np.zeros((len(framework), seq_length, 3, 4))
        abs_VO_poses = np.zeros((len(framework), 12))

    abs_VO_pose = np.identity(4)
    last_pose = np.identity(4)
    last_VO_pose = np.identity(4)

    # L和C的转换矩阵,对齐输入位姿到雷达坐标系
    Transform_matrix_L2C = np.identity(4)
    Transform_matrix_L2C[:3, :3] = np.array([[7.533745e-03, -9.999714e-01, -6.166020e-04],
                                             [1.480249e-02, 7.280733e-04, -9.998902e-01],
                                             [9.998621e-01, 7.523790e-03, 1.480755e-02]])
    Transform_matrix_L2C[:3, -1:] = np.array([-4.069766e-03, -7.631618e-02, -2.717806e-01]).reshape(3, 1)

    Transform_matrix_C2L = np.linalg.inv(Transform_matrix_L2C)

    pointClouds = loadPointCloud(args.dataset_dir + "/sequences/" + args.sequence_idx + "/velodyne")

    # *************可视化准备***********************
    num_frames = len(tqdm(framework))
    # Pose Graph Manager (for back-end optimization) initialization
    PGM = PoseGraphManager()
    PGM.addPriorFactor()

    # Result saver
    save_dir = "result/" + args.sequence_idx
    if not os.path.exists(save_dir): os.makedirs(save_dir)
    ResultSaver = PoseGraphResultSaver(init_pose=PGM.curr_se3,
                                       save_gap=args.save_gap,
                                       num_frames=num_frames,
                                       seq_idx=args.sequence_idx,
                                       save_dir=save_dir)

    # Scan Context Manager (for loop detection) initialization
    SCM = ScanContextManager(shape=[args.num_rings, args.num_sectors],
                             num_candidates=args.num_candidates,
                             threshold=args.loop_threshold)

    # for save the results as a video
    fig_idx = 1
    fig = plt.figure(fig_idx)
    writer = FFMpegWriter(fps=15)
    video_name = args.sequence_idx + "_" + str(args.num_icp_points) + "_prop@" + str(args.proposal) + "_tol@" + str(
        args.tolerance) + ".mp4"
    num_frames_to_skip_to_show = 5
    num_frames_to_save = np.floor(num_frames / num_frames_to_skip_to_show)
    with writer.saving(fig, video_name, num_frames_to_save):  # this video saving part is optional

        for j, sample in enumerate(tqdm(framework)):

            '''
            ***************************************VO部分*******************************************
            '''
            imgs = sample['imgs']

            h, w, _ = imgs[0].shape
            if (not args.no_resize) and (h != args.img_height or w != args.img_width):
                imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs]

            imgs = [np.transpose(img, (2, 0, 1)) for img in imgs]

            ref_imgs = []
            for i, img in enumerate(imgs):
                img = torch.from_numpy(img).unsqueeze(0)
                img = ((img / 255 - 0.5) / 0.5).to(device)
                if i == len(imgs) // 2:
                    tgt_img = img
                else:
                    ref_imgs.append(img)

            startTimeVO = time.time()
            _, poses = pose_net(tgt_img, ref_imgs)
            timeCostVO = time.time() - startTimeVO

            poses = poses.cpu()[0]
            poses = torch.cat([poses[:len(imgs) // 2], torch.zeros(1, 6).float(), poses[len(imgs) // 2:]])

            inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64)

            rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3])
            tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:]

            transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1)
            print('**********DeepVO result: time_cost {:.3} s'.format(timeCostVO / (len(imgs) - 1)))
            # print(transform_matrices)
            # 将对[0 1 2]中间1的转换矩阵变成对0的位姿转换
            first_inv_transform = inv_transform_matrices[0]
            final_poses = first_inv_transform[:, :3] @ transform_matrices
            final_poses[:, :, -1:] += first_inv_transform[:, -1:]
            # print('first')
            # print(first_inv_transform)
            print('poses')
            print(final_poses)

            # cur_VO_pose取final poses的第2项,则是取T10,T21,T32。。。
            cur_VO_pose = np.identity(4)
            cur_VO_pose[:3, :] = final_poses[1]
            print("对齐前未有尺度修正的帧间位姿")
            print(cur_VO_pose)

            print("last_pose")
            print(last_pose)
            print("last_VO_pose")
            print(last_VO_pose)

            #尺度因子的确定:采用上一帧的LO输出位姿和VO输出位姿的尺度比值作为当前帧的尺度因子,初始尺度为1
            if j == 0:
                scale_factor = 7
            else:
                scale_factor = math.sqrt(np.sum(last_pose[:3, -1] ** 2) / np.sum(last_VO_pose[:3, -1] ** 2))
                print("分子", np.sum(last_pose[:3, -1] ** 2))
                print("分母", np.sum(last_VO_pose[:3, -1] ** 2))
            last_VO_pose = copy.deepcopy(cur_VO_pose)  # 注意深拷贝
            print("尺度因子:", scale_factor)

            # 先尺度修正,再对齐
            cur_VO_pose[:3, -1:] = cur_VO_pose[:3, -1:] * scale_factor
            print("尺度修正后...")
            print(cur_VO_pose)
            cur_VO_pose = Transform_matrix_C2L @ cur_VO_pose @ np.linalg.inv(Transform_matrix_C2L)

            print("对齐到雷达坐标系帧间位姿")
            print(cur_VO_pose)

            '''*************************LO部分******************************************'''
            tgt_pc = random_sampling(pointClouds[j], 5000)
            pc = random_sampling(pointClouds[j + 1], 5000)

            from point_cloud_processing.icpImpl import icp
            if args.proposal == 0:
                init_pose = None
            elif args.proposal == 1:
                init_pose = last_pose
            elif args.proposal == 2:
                init_pose = cur_VO_pose

            startTimeLO = time.time()
            odom_transform, distacnces, iterations = icp(pc, tgt_pc, init_pose=init_pose, tolerance=args.tolerance,
                                                         max_iterations=50)
            iter_time = time.time() - startTimeLO
            LO_iter_times[j] = iter_time
            iteration_arr[j] = iterations

            last_pose = odom_transform

            print("LO优化后的位姿,mean_dis: ", np.asarray(distacnces).mean())
            print(odom_transform)
            print("LO迭代次数:", iterations)

            PGM.curr_node_idx = j  # make start with 0
            if (PGM.curr_node_idx == 0):
                PGM.prev_node_idx = PGM.curr_node_idx
                continue
            # update the current (moved) pose
            PGM.curr_se3 = np.matmul(PGM.curr_se3, odom_transform)

            # add the odometry factor to the graph
            # PGM.addOdometryFactor(cur_VO_pose)

            # renewal the prev information
            PGM.prev_node_idx = PGM.curr_node_idx

            # loop detection and optimize the graph
            if (PGM.curr_node_idx > 1 and PGM.curr_node_idx % args.try_gap_loop_detection == 0):
                # 1/ loop detection
                loop_idx, loop_dist, yaw_diff_deg = SCM.detectLoop()
                if (loop_idx == None):  # NOT FOUND
                    pass
                # else:
                #     print("Loop event detected: ", PGM.curr_node_idx, loop_idx, loop_dist)
                #     # 2-1/ add the loop factor
                #     loop_scan_down_pts = SCM.getPtcloud(loop_idx)
                #     loop_transform, _, _ = ICP.icp(curr_scan_down_pts, loop_scan_down_pts,
                #                                    init_pose=yawdeg2se3(yaw_diff_deg), max_iterations=20)
                #     PGM.addLoopFactor(loop_transform, loop_idx)
                #
                #     # 2-2/ graph optimization
                #     PGM.optimizePoseGraph()
                #
                #     # 2-2/ save optimized poses
                #     ResultSaver.saveOptimizedPoseGraphResult(PGM.curr_node_idx, PGM.graph_optimized)

            # save the ICP odometry pose result (no loop closure)
            ResultSaver.saveUnoptimizedPoseGraphResult(PGM.curr_se3, PGM.curr_node_idx)
            if (j % num_frames_to_skip_to_show == 0):
                ResultSaver.vizCurrentTrajectory(fig_idx=fig_idx)
                writer.grab_frame()

            if args.output_dir is not None:
                predictions_array[j] = final_poses
                abs_VO_poses[j] = abs_VO_pose[:3, :].reshape(-1, 12)[0]

            ATE, RE = compute_pose_error(sample['poses'], final_poses)
            errors[j] = ATE, RE

            optimized_ATE, optimized_RE = compute_LO_pose_error(sample['poses'], odom_transform, Transform_matrix_L2C)
            optimized_errors[j] = optimized_ATE, optimized_RE

        # VO输出位姿的精度指标
        mean_errors = errors.mean(0)
        std_errors = errors.std(0)
        error_names = ['ATE', 'RE']
        print('')
        print("VO_Results")
        print("\t {:>10}, {:>10}".format(*error_names))
        print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors))
        print("std \t {:10.4f}, {:10.4f}".format(*std_errors))

        # LO二次优化后的精度指标
        optimized_mean_errors = optimized_errors.mean(0)
        optimized_std_errors = optimized_errors.std(0)
        optimized_error_names = ['optimized_ATE', 'optimized_RE']
        print('')
        print("LO_optimized_Results")
        print("\t {:>10}, {:>10}".format(*optimized_error_names))
        print("mean \t {:10.4f}, {:10.4f}".format(*optimized_mean_errors))
        print("std \t {:10.4f}, {:10.4f}".format(*optimized_std_errors))

        # 迭代次数
        mean_iterations = iteration_arr.mean()
        std_iterations = iteration_arr.std()
        _names = ['iteration']
        print('')
        print("LO迭代次数")
        print("\t {:>10}".format(*_names))
        print("mean \t {:10.4f}".format(mean_iterations))
        print("std \t {:10.4f}".format(std_iterations))

        # 迭代时间
        mean_iter_time = LO_iter_times.mean()
        std_iter_time = LO_iter_times.std()
        _names = ['iter_time']
        print('')
        print("LO迭代时间:单位/s")
        print("\t {:>10}".format(*_names))
        print("mean \t {:10.4f}".format(mean_iter_time))
        print("std \t {:10.4f}".format(std_iter_time))

        if args.output_dir is not None:
            np.save(output_dir / 'predictions.npy', predictions_array)
            np.savetxt(output_dir / 'abs_VO_poses.txt', abs_VO_poses)
Esempio n. 13
0
def main():
    args = parser.parse_args()
    from sintel_eval.pose_evaluation_utils import test_framework_Sintel as test_framework

    weights = torch.load(args.pretrained_posenet)
    seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3)
    pose_net = getattr(models, args.posenet)(nb_ref_imgs=seq_length - 1).cuda()
    pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    framework = test_framework(dataset_dir, args.sequences, seq_length)

    print('{} snippets to test'.format(len(framework)))
    RE = np.zeros((len(framework)), np.float32)
    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()
        predictions_array = np.zeros((len(framework), seq_length, 3, 4))

    for j, sample in enumerate(tqdm(framework)):
        imgs = sample['imgs']

        h, w, _ = imgs[0].shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            imgs = [
                imresize(img,
                         (args.img_height, args.img_width)).astype(np.float32)
                for img in imgs
            ]

        imgs = [np.transpose(img, (2, 0, 1)) for img in imgs]

        ref_imgs_var = []
        for i, img in enumerate(imgs):
            img = torch.from_numpy(img).unsqueeze(0)
            img = ((img / 255 - 0.5) / 0.5).cuda()
            img_var = Variable(img, volatile=True)
            if i == len(imgs) // 2:
                tgt_img_var = img_var
            else:
                ref_imgs_var.append(Variable(img, volatile=True))

        if args.posenet in ["PoseNet6", "PoseNetB6"]:
            poses = pose_net(tgt_img_var, ref_imgs_var)
        else:
            _, poses = pose_net(tgt_img_var, ref_imgs_var)

        poses = poses.cpu().data[0]
        poses = torch.cat([
            poses[:len(imgs) // 2],
            torch.zeros(1, 6).float(), poses[len(imgs) // 2:]
        ])

        inv_transform_matrices = pose_vec2mat(
            Variable(poses),
            rotation_mode=args.rotation_mode).data.numpy().astype(np.float64)

        rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3])
        tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:]

        transform_matrices = np.concatenate([rot_matrices, tr_vectors],
                                            axis=-1)

        first_inv_transform = inv_transform_matrices[0]
        final_poses = first_inv_transform[:, :3] @ transform_matrices
        final_poses[:, :, -1:] += first_inv_transform[:, -1:]

        if args.output_dir is not None:
            predictions_array[j] = final_poses

        RE[j] = compute_pose_error(sample['poses'], final_poses)

    print('')
    print("Results")
    print("\t {:>10}".format('RE'))
    print("mean \t {:10.4f}".format(RE.mean()))
    print("std \t {:10.4f}".format(RE.std()))

    if args.output_dir is not None:
        np.save(output_dir / 'predictions.npy', predictions_array)
Esempio n. 14
0
def train(args, train_loader, disp_net, pose_exp_net, optimizer, epoch_size,
          logger, tb_writer):
    global n_iter, device
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter(precision=4)
    w1, w2, w3 = args.photo_loss_weight, args.mask_loss_weight, args.smooth_loss_weight

    # switch to train mode
    disp_net.train()
    pose_exp_net.train()

    end = time.time()
    logger.train_bar.update(0)

    for i, (tgt_img, ref_imgs, intrinsics,
            intrinsics_inv) in enumerate(train_loader):
        log_losses = i > 0 and n_iter % args.print_freq == 0
        log_output = args.training_output_freq > 0 and n_iter % args.training_output_freq == 0

        # measure data loading time
        data_time.update(time.time() - end)
        tgt_img = tgt_img.to(device)
        ref_imgs = [img.to(device) for img in ref_imgs]
        intrinsics = intrinsics.to(device)

        # compute output
        disparities = disp_net(tgt_img)
        depth = [1 / disp for disp in disparities]
        #         print("***",len(depth),depth[0].size())
        explainability_mask, pose = pose_exp_net(tgt_img, ref_imgs)

        loss_1, warped, diff = photometric_reconstruction_loss(
            tgt_img, ref_imgs, intrinsics, depth, explainability_mask, pose,
            args.rotation_mode, args.padding_mode)
        if w2 > 0:
            loss_2 = explainability_loss(explainability_mask)
        else:
            loss_2 = 0
        loss_3 = smooth_loss(depth)

        loss = w1 * loss_1 + w2 * loss_2 + w3 * loss_3

        if args.with_photocon_loss:
            batch_size = pose.size()[0]
            homo_row = torch.tensor([[0, 0, 0, 1]],
                                    dtype=torch.float).to(device)
            homo_row = homo_row.unsqueeze(0).expand(batch_size, -1, -1)
            T21 = pose_vec2mat(pose[:, 0])
            T21 = torch.cat((T21, homo_row), 1)
            T12 = torch.inverse(T21)
            T23 = pose_vec2mat(pose[:, 1])
            T23 = torch.cat((T23, homo_row), 1)
            T13 = torch.matmul(T23, T12)  #[B, 4, 4]
            #             print("----",T13.size())
            # target = 1 and ref = 3
            ref_img_warped, valid_points = inverse_warp_posemat(
                ref_imgs[1], depth[0][:, 0], T13, intrinsics,
                args.rotation_mode, args.padding_mode)
            diff = (ref_imgs[0] -
                    ref_img_warped) * valid_points.unsqueeze(1).float()
            loss_4 = diff.abs().mean()

            loss += loss_4

        if log_losses:
            tb_writer.add_scalar('photometric_error', loss_1.item(), n_iter)
            if w2 > 0:
                tb_writer.add_scalar('explanability_loss', loss_2.item(),
                                     n_iter)
            tb_writer.add_scalar('disparity_smoothness_loss', loss_3.item(),
                                 n_iter)
            tb_writer.add_scalar('total_loss', loss.item(), n_iter)

        if log_output:
            tb_writer.add_image('train Input', tensor2array(tgt_img[0]),
                                n_iter)
            for k, scaled_maps in enumerate(
                    zip(depth, disparities, warped, diff,
                        explainability_mask)):
                log_output_tensorboard(tb_writer, "train", 0, " {}".format(k),
                                       n_iter, *scaled_maps)

        # record loss and EPE
        losses.update(loss.item(), args.batch_size)

        # compute gradient and do Adam step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        with open(args.save_path / args.log_full, 'a') as csvfile:
            writer = csv.writer(csvfile, delimiter='\t')
            writer.writerow([
                loss.item(),
                loss_1.item(),
                loss_2.item() if w2 > 0 else 0,
                loss_3.item()
            ])
        logger.train_bar.update(i + 1)
        if i % args.print_freq == 0:
            logger.train_writer.write('Train: Time {} Data {} Loss {}'.format(
                batch_time, data_time, losses))
        if i >= epoch_size - 1:
            break

        n_iter += 1

    return losses.avg[0]
Esempio n. 15
0
def main():
    args = parser.parse_args()

    weights = torch.load(args.pretrained_posenet)
    pose_net = models.PoseNet().to(device)
    pose_net.load_state_dict(weights['state_dict'], strict=False)
    pose_net.eval()

    seq_length = 5
    dataset_dir = Path(args.dataset_dir)
    framework = test_framework(dataset_dir, args.sequences, seq_length)
    print('{} snippets to test'.format(len(framework)))

    errors = np.zeros((len(framework), 2), np.float32)
    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()
        predictions_array = np.zeros((len(framework), seq_length, 3, 4))

    for j, sample in enumerate(tqdm(framework)):
        imgs = sample['imgs']

        h, w, _ = imgs[0].shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            imgs = [
                imresize(img,
                         (args.img_height, args.img_width)).astype(np.float32)
                for img in imgs
            ]

        imgs = [np.transpose(img, (2, 0, 1)) for img in imgs]

        tensor_imgs = []
        for i, img in enumerate(imgs):
            img = ((torch.from_numpy(img).unsqueeze(0) / 255 - 0.5) /
                   0.5).to(device)
            tensor_imgs.append(img)

        global_pose = np.identity(4)
        poses = []
        poses.append(global_pose[0:3, :])

        for iter in range(seq_length - 1):
            pose = pose_net(tensor_imgs[iter], tensor_imgs[iter + 1])
            pose_mat = pose_vec2mat(pose).squeeze(0).cpu().numpy()
            pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])])

            global_pose = global_pose @ np.linalg.inv(pose_mat)
            poses.append(global_pose[0:3, :])

        final_poses = np.stack(poses, axis=0)

        if args.output_dir is not None:
            predictions_array[j] = final_poses

        ATE, RE = compute_pose_error(sample['poses'], final_poses)
        errors[j] = ATE, RE

    mean_errors = errors.mean(0)
    std_errors = errors.std(0)
    error_names = ['ATE', 'RE']
    print('')
    print("Results")
    print("\t {:>10}, {:>10}".format(*error_names))
    print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors))
    print("std \t {:10.4f}, {:10.4f}".format(*std_errors))

    if args.output_dir is not None:
        np.save(output_dir / 'predictions.npy', predictions_array)
Esempio n. 16
0
def main():
    args = parser.parse_args()
    attack = False
    if args.perturbation and args.tracker_file:
        attack = True
        perturbation = np.load(Path(args.perturbation))
        noise_mask = np.load(Path(args.tracker_file))

    from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework

    weights = torch.load(args.pretrained_posenet, map_location=device)
    seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3)
    pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1,
                          output_exp=False).to(device)
    pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    framework = test_framework(dataset_dir, args.sequences, seq_length)

    print('{} snippets to test'.format(len(framework)))
    errors = np.zeros((len(framework), 2), np.float32)
    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()
        predictions_array = np.zeros((len(framework), seq_length, 3, 4))
        ground_truth_array = np.zeros((len(framework), seq_length, 3, 4))

    for j, sample in enumerate(tqdm(framework)):
        imgs = sample['imgs']

        h, w, _ = imgs[0].shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            imgs = [
                imresize(img,
                         (args.img_height, args.img_width)).astype(np.float32)
                for img in imgs
            ]

        imgs = [np.transpose(img, (2, 0, 1)) for img in imgs]

        ref_imgs = []
        for i, img in enumerate(imgs):
            img = torch.from_numpy(img).unsqueeze(0)
            img = ((img / 255 - 0.5) / 0.5).to(device)
            if i == len(imgs) // 2:
                tgt_img = img
            else:
                ref_imgs.append(img)

        if attack:
            # Add noise to target image
            if j + 2 >= first_frame and j + 2 < last_frame:
                curr_mask = noise_mask[j - first_frame + 2].astype(np.int)
                w = curr_mask[2] - curr_mask[0]
                h = curr_mask[3] - curr_mask[1]
                noise_box = resize2d(perturbation, (h, w))
                tgt_img[0][:, curr_mask[1]:curr_mask[3],
                           curr_mask[0]:curr_mask[2]] += noise_box
                tgt_img[0] = tgt_img[0].clamp(-1, 1)

            # Add noise to reference images
            for k in range(5):
                ref_idx = k
                if k == 2:
                    # Skip target image
                    continue
                if k > 2:
                    # Since it is numbered: ref1, ref2, tgt, ref3, ref4
                    ref_idx = k - 1
                if j + k >= first_frame and j + k < last_frame:
                    curr_mask = noise_mask[j - first_frame + k].astype(np.int)
                    w = curr_mask[2] - curr_mask[0]
                    h = curr_mask[3] - curr_mask[1]
                    noise_box = resize2d(perturbation, (h, w))
                    ref_imgs[ref_idx][
                        0][:, curr_mask[1]:curr_mask[3],
                           curr_mask[0]:curr_mask[2]] += noise_box
                    ref_imgs[ref_idx] = ref_imgs[ref_idx].clamp(-1, 1)

        _, poses = pose_net(tgt_img, ref_imgs)
        poses = poses.cpu()[0]
        poses = torch.cat([
            poses[:len(imgs) // 2],
            torch.zeros(1, 6).float(), poses[len(imgs) // 2:]
        ])
        inv_transform_matrices = pose_vec2mat(
            poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64)

        rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3])
        tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:]

        transform_matrices = np.concatenate([rot_matrices, tr_vectors],
                                            axis=-1)

        first_inv_transform = inv_transform_matrices[0]
        final_poses = first_inv_transform[:, :3] @ transform_matrices
        final_poses[:, :, -1:] += first_inv_transform[:, -1:]

        if args.output_dir is not None:
            ground_truth_array[j] = sample['poses']
            predictions_array[j] = final_poses

        ATE, RE = compute_pose_error(sample['poses'], final_poses)
        errors[j] = ATE, RE

    mean_errors = errors.mean(0)
    std_errors = errors.std(0)
    error_names = ['ATE', 'RE']
    print('')
    print("Results")
    print("\t {:>10}, {:>10}".format(*error_names))
    print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors))
    print("std \t {:10.4f}, {:10.4f}".format(*std_errors))

    if args.output_dir is not None:
        np.save(output_dir / 'ground_truth.npy', ground_truth_array)
        np.save(output_dir / 'predictions_perturbed.npy', predictions_array)
Esempio n. 17
0
def main():
    args = parser.parse_args()

    weights_pose = torch.load(args.pretrained_posenet)
    pose_net = models.PoseResNet().to(device)
    pose_net.load_state_dict(weights_pose['state_dict'], strict=False)
    pose_net.eval()

    image_dir = Path(args.dataset_dir)
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()

    test_files = sum(
        [image_dir.files('*.{}'.format(ext)) for ext in args.img_exts], [])
    test_files.sort()

    print('{} files to test'.format(len(test_files)))
    #print(test_files)

    global_pose = np.eye(4)
    if 'advio-04' in args.dataset_dir:
        global_pose = np.array([[
            0.2337503561460601, 0.71124984004749991, -0.66293618387207998,
            0.0037387620000000001
        ],
                                [
                                    0.10487759004941999, 0.65940272844750003,
                                    0.74443851971943986, 0.082704390000000003
                                ],
                                [
                                    0.96662371684119996, -0.24353990138999998,
                                    0.079541459813400106, -0.023607989999999999
                                ], [0.0, 0.0, 0.0, 1.0]])  # advio-04
    elif 'advio-23' in args.dataset_dir:
        global_pose = np.array([[
            -0.02024017794777988, -0.7758527702428, 0.630589204478,
            -0.005759389
        ], [0.11342629147720007, 0.62486446287, 0.7724499038534, -0.09049783],
                                [
                                    -0.993340194646, 0.08715994761339996,
                                    0.0753547505262201, -0.001374606
                                ], [0.0, 0.0, 0.0, 1.0]])  # advio-23
    print(str(global_pose))
    poses = [global_pose[0:3, :].reshape(1, 12)]

    n = len(test_files)
    tensor_img1 = load_tensor_image(test_files[0], args)

    for iter in tqdm(range(n - 1)):

        tensor_img2 = load_tensor_image(test_files[iter + 1], args)

        pose = pose_net(tensor_img1, tensor_img2)

        pose_mat = pose_vec2mat(pose).squeeze(0).cpu().numpy()
        pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])])
        global_pose = global_pose @ np.linalg.inv(pose_mat)

        poses.append(global_pose[0:3, :].reshape(1, 12))

        # update
        tensor_img1 = tensor_img2

    poses = np.concatenate(poses, axis=0)
    filename = Path(args.output_dir + args.sequence + ".txt")
    np.savetxt(filename, poses, delimiter=' ', fmt='%1.8e')
Esempio n. 18
0
def validate_without_gt(args, val_loader, depth_net, pose_net, epoch, logger,
                        tb_writer, sample_nb_to_log, **env):
    global device
    batch_time = AverageMeter()
    losses = AverageMeter(i=3, precision=4)
    w1, w2, w3 = args.photo_loss_weight, args.smooth_loss_weight, args.ssim
    if args.log_output:
        poses_values = np.zeros(((len(val_loader) - 1) * args.test_batch_size *
                                 (args.sequence_length - 1), 6))
        disp_values = np.zeros(
            ((len(val_loader) - 1) * args.test_batch_size * 3))

    # switch to evaluate mode
    depth_net.eval()
    pose_net.eval()

    end = time.time()
    logger.valid_bar.update(0)

    for i, sample in enumerate(val_loader):
        log_output = i < sample_nb_to_log

        imgs = torch.stack(sample['imgs'], dim=1).to(device)
        intrinsics = sample['intrinsics'].to(device)

        if epoch == 1 and log_output:
            for j, img in enumerate(sample['imgs']):
                tb_writer.add_image('val Input/{}'.format(i),
                                    tensor2array(img[0]), j)

        batch_size, seq = imgs.size()[:2]
        poses = pose_net(imgs)
        pose_matrices = pose_vec2mat(poses,
                                     args.rotation_mode)  # [B, seq, 3, 4]

        mid_index = (args.sequence_length - 1) // 2

        tgt_imgs = imgs[:, mid_index]  # [B, 3, H, W]
        tgt_poses = pose_matrices[:, mid_index]  # [B, 3, 4]
        compensated_poses = compensate_pose(
            pose_matrices,
            tgt_poses)  # [B, seq, 3, 4] tgt_poses are now neutral pose

        ref_ids = list(range(args.sequence_length))
        ref_ids.remove(mid_index)

        loss_1 = 0
        loss_2 = 0

        for ref_index in ref_ids:
            prior_imgs = imgs[:, ref_index]
            prior_poses = compensated_poses[:, ref_index]  # [B, 3, 4]

            prior_imgs_compensated = inverse_rotate(prior_imgs,
                                                    prior_poses[:, :, :3],
                                                    intrinsics)
            input_pair = torch.cat([prior_imgs_compensated, tgt_imgs],
                                   dim=1)  # [B, 6, W, H]

            predicted_magnitude = prior_poses[:, :, -1:].norm(
                p=2, dim=1, keepdim=True).unsqueeze(1)  # [B, 1, 1, 1]
            scale_factor = args.nominal_displacement / predicted_magnitude
            normalized_translation = compensated_poses[:, :, :,
                                                       -1:] * scale_factor  # [B, seq, 3, 1]
            new_pose_matrices = torch.cat(
                [compensated_poses[:, :, :, :-1], normalized_translation],
                dim=-1)

            depth = depth_net(input_pair)
            disparity = 1 / depth

            tgt_id = torch.full((batch_size, ),
                                ref_index,
                                dtype=torch.int64,
                                device=device)
            ref_ids_tensor = torch.tensor(ref_ids,
                                          dtype=torch.int64,
                                          device=device).expand(
                                              batch_size, -1)
            photo_loss, *to_log = photometric_reconstruction_loss(
                imgs,
                tgt_id,
                ref_ids_tensor,
                depth,
                new_pose_matrices,
                intrinsics,
                args.rotation_mode,
                ssim_weight=w3,
                upsample=args.upscale)

            loss_1 += photo_loss

            if log_output:
                log_output_tensorboard(tb_writer, "train", i, ref_index, epoch,
                                       depth[0], disparity[0], *to_log)

            loss_2 += grad_diffusion_loss(disparity, tgt_imgs, args.kappa)

        if args.log_output and i < len(val_loader) - 1:
            step = args.test_batch_size * (args.sequence_length - 1)
            poses_values[i * step:(i + 1) * step] = poses[:, :-1].cpu().view(
                -1, 6).numpy()
            step = args.test_batch_size * 3
            disp_unraveled = disparity.cpu().view(args.test_batch_size, -1)
            disp_values[i * step:(i + 1) * step] = torch.cat([
                disp_unraveled.min(-1)[0],
                disp_unraveled.median(-1)[0],
                disp_unraveled.max(-1)[0]
            ]).numpy()

        loss = w1 * loss_1 + w2 * loss_2
        losses.update([loss.item(), loss_1.item(), loss_2.item()])

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        logger.valid_bar.update(i + 1)
        if i % args.print_freq == 0:
            logger.valid_writer.write('valid: Time {} Loss {}'.format(
                batch_time, losses))

    if args.log_output:
        rot_coeffs = ['rx', 'ry', 'rz'] if args.rotation_mode == 'euler' else [
            'qx', 'qy', 'qz'
        ]
        tr_coeffs = ['tx', 'ty', 'tz']
        for k, (coeff_name) in enumerate(tr_coeffs + rot_coeffs):
            tb_writer.add_histogram('val poses_{}'.format(coeff_name),
                                    poses_values[:, k], epoch)
        tb_writer.add_histogram('disp_values', disp_values, epoch)
    logger.valid_bar.update(len(val_loader))
    return OrderedDict(
        zip(['Total loss', 'Photo loss', 'Smooth loss'], losses.avg))
Esempio n. 19
0
def train_one_epoch(args, train_loader, depth_net, pose_net, optimizer, epoch,
                    n_iter, logger, tb_writer, **env):
    global device
    logger.reset_train_bar()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter(precision=4)
    w1, w2, w3 = args.photo_loss_weight, args.smooth_loss_weight, args.ssim
    e1, e2 = args.training_milestones

    # switch to train mode
    depth_net.train()
    pose_net.train()

    end = time.time()
    logger.train_bar.update(0)

    for i, sample in enumerate(train_loader):

        log_losses = i > 0 and n_iter % args.print_freq == 0
        log_output = args.training_output_freq > 0 and n_iter % args.training_output_freq == 0

        # measure data loading time
        data_time.update(time.time() - end)
        imgs = torch.stack(sample['imgs'], dim=1).to(device)
        intrinsics = sample['intrinsics'].to(device)

        batch_size, seq = imgs.size()[:2]

        if args.network_input_size is not None:
            h, w = args.network_input_size
            downsample_imgs = F.interpolate(imgs, (3, h, w), mode='area')
            poses = pose_net(downsample_imgs)  # [B, seq, 6]
        else:
            poses = pose_net(imgs)

        pose_matrices = pose_vec2mat(poses,
                                     args.rotation_mode)  # [B, seq, 3, 4]

        total_indices = torch.arange(seq, dtype=torch.int64,
                                     device=device).expand(batch_size, seq)
        batch_range = torch.arange(batch_size,
                                   dtype=torch.int64,
                                   device=device)
        ''' for each element of the batch select a random picture in the sequence to
        which we will compute the depth, all poses are then converted so that pose of this
        very picture is exactly identity. At first this image is always in the middle of the sequence'''

        if epoch > e2:
            tgt_id = torch.randint(0, seq, (batch_size, ), device=device)
        else:
            tgt_id = torch.full_like(batch_range, args.sequence_length // 2)

        ref_ids = total_indices[total_indices != tgt_id.unsqueeze(1)].view(
            batch_size, seq - 1)
        '''
        Select what other picture we are going to feed DepthNet, it must not be the same
        as tgt_id. At first, it's always first picture of the sequence, it is randomly chosen when first training milestone is reached
        '''

        if epoch > e1:
            probs = torch.ones_like(total_indices, dtype=torch.float32)
            probs[batch_range, tgt_id] = args.same_ratio
            prior_id = torch.multinomial(probs, 1)[:, 0]
        else:
            prior_id = torch.zeros_like(batch_range)

        # Treat the case of prior_id == tgt_id and the depth must be max_depth, regardless of apparent movement

        tgt_imgs = imgs[batch_range, tgt_id]  # [B, 3, H, W]
        tgt_poses = pose_matrices[batch_range, tgt_id]  # [B, 3, 4]

        prior_imgs = imgs[batch_range, prior_id]

        compensated_poses = compensate_pose(
            pose_matrices,
            tgt_poses)  # [B, seq, 3, 4] tgt_poses are now neutral pose
        prior_poses = compensated_poses[batch_range, prior_id]  # [B, 3, 4]

        if args.supervise_pose:
            from_GT = invert_mat(sample['pose']).to(device)
            compensated_GT_poses = compensate_pose(
                from_GT, from_GT[batch_range, tgt_id])
            prior_GT_poses = compensated_GT_poses[batch_range, prior_id]
            prior_imgs_compensated = inverse_rotate(prior_imgs,
                                                    prior_GT_poses[:, :, :-1],
                                                    intrinsics)
        else:
            prior_imgs_compensated = inverse_rotate(prior_imgs,
                                                    prior_poses[:, :, :-1],
                                                    intrinsics)

        input_pair = torch.cat([prior_imgs_compensated, tgt_imgs],
                               dim=1)  # [B, 6, W, H]
        depth = depth_net(input_pair)

        # depth = [sample['depth'].to(device).unsqueeze(1) * 3 / abs(tgt_id[0] - prior_id[0])]
        # depth.append(torch.nn.functional.interpolate(depth[0], scale_factor=2))
        disparities = [1 / d for d in depth]

        predicted_magnitude = prior_poses[:, :,
                                          -1:].norm(p=2, dim=1,
                                                    keepdim=True).unsqueeze(1)
        scale_factor = args.nominal_displacement / (predicted_magnitude + 1e-5)
        normalized_translation = compensated_poses[:, :, :,
                                                   -1:] * scale_factor  # [B, seq_length-1, 3]
        new_pose_matrices = torch.cat(
            [compensated_poses[:, :, :, :-1], normalized_translation], dim=-1)

        biggest_scale = depth[0].size(-1)

        # Construct valid sequence to compute photometric error,
        # make the rest converge to max_depth because nothing moved
        vb = batch_range[prior_id != tgt_id]
        same_range = batch_range[prior_id == tgt_id]  # batch of still pairs

        loss_1 = 0
        loss_1_same = 0
        for k, scaled_depth in enumerate(depth):
            size_ratio = scaled_depth.size(-1) / biggest_scale

            if len(same_range) > 0:
                # Frames are identical. The corresponding depth must be infinite. Here, we set it to max depth
                still_depth = scaled_depth[same_range]
                loss_same = F.smooth_l1_loss(still_depth / args.max_depth,
                                             torch.ones_like(still_depth))
            else:
                loss_same = 0

            loss_valid, *to_log = photometric_reconstruction_loss(
                imgs[vb],
                tgt_id[vb],
                ref_ids[vb],
                scaled_depth[vb],
                new_pose_matrices[vb],
                intrinsics[vb],
                args.rotation_mode,
                ssim_weight=w3,
                upsample=args.upscale)

            loss_1 += loss_valid * size_ratio
            loss_1_same += loss_same * size_ratio

            if log_output and len(vb) > 0:
                log_output_tensorboard(tb_writer, "train", 0, k, n_iter,
                                       scaled_depth[0], disparities[k][0],
                                       *to_log)
        loss_2 = grad_diffusion_loss(disparities, tgt_imgs, args.kappa)

        loss = w1 * (loss_1 + loss_1_same) + w2 * loss_2
        if args.supervise_pose:
            loss += (from_GT[:, :, :, :3] -
                     pose_matrices[:, :, :, :3]).abs().mean()

        if log_losses:
            tb_writer.add_scalar('photometric_error', loss_1.item(), n_iter)
            tb_writer.add_scalar('disparity_smoothness_loss', loss_2.item(),
                                 n_iter)
            tb_writer.add_scalar('total_loss', loss.item(), n_iter)

        if log_output and len(vb) > 0:
            valid_poses = poses[vb]
            nominal_translation_magnitude = valid_poses[:, -2, :3].norm(p=2,
                                                                        dim=-1)
            # Log the translation magnitude relative to translation magnitude between last and penultimate frames
            # for a perfectly constant displacement magnitude, you should get ratio of 2,3,4 and so forth.
            # last pose is always identity and penultimate translation magnitude is always 1, so you don't need to log them
            for j in range(args.sequence_length - 2):
                trans_mag = valid_poses[:, j, :3].norm(p=2, dim=-1)
                tb_writer.add_histogram(
                    'tr {}'.format(j),
                    (trans_mag /
                     nominal_translation_magnitude).detach().cpu().numpy(),
                    n_iter)
            for j in range(args.sequence_length - 1):
                # TODO log a better value : this is magnitude of vector (yaw, pitch, roll) which is not a physical value
                rot_mag = valid_poses[:, j, 3:].norm(p=2, dim=-1)
                tb_writer.add_histogram('rot {}'.format(j),
                                        rot_mag.detach().cpu().numpy(), n_iter)

            tb_writer.add_image('train Input', tensor2array(tgt_imgs[0]),
                                n_iter)

        # record loss for average meter
        losses.update(loss.item(), args.batch_size)

        # compute gradient and do Adam step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        with open(args.save_path / args.log_full, 'a') as csvfile:
            writer = csv.writer(csvfile, delimiter='\t')
            writer.writerow([loss.item(), loss_1.item(), loss_2.item()])
        logger.train_bar.update(i + 1)
        if i % args.print_freq == 0:
            logger.train_writer.write('Train: Time {} Data {} Loss {}'.format(
                batch_time, data_time, losses))
        if i >= args.epoch_size - 1:
            break

        n_iter += 1

    return losses.avg[0], n_iter
def main():
    args = parser.parse_args()

    limit_1 = 8
    limit_2 = 28

    sequences = os.listdir(args.dataset_dir)

    for seq in sequences:
        if '.txt' not in seq:
            print(seq)
            args.sequence = seq

            device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

            image_dir = Path(args.dataset_dir + args.sequence)

            test_files = sum([image_dir.files('*.{}'.format(ext))
                              for ext in args.img_exts], [])
            test_files.sort()
            test_files = test_files
            print('{} files to test'.format(len(test_files)))

            camera_matrix = np.loadtxt(Path(args.dataset_dir + args.sequence + "/cam.txt")).astype(np.float32)
            # print(camera_matrix)
            rvec = np.array([0., 0., 0])
            tvec = np.array([0., 0., 0])
            rvec, _ = cv2.Rodrigues(rvec)
            NUM_SHOW_POINTS = 70

            # homogeneous point [x, y, z, w] corresponds to the three-dimensional point [x/w, y/w, z/w].
            project_points = np.array([[0, 1.7, 3, 1]]).reshape(1, 1, 4)
            # homogeneous point [x, y, z, w] corresponds to the three-dimensional point [x/w, y/w, z/w].
            project_points_l = np.array([[-0.8, 1.7, 3, 1]]).reshape(1, 1, 4)
            # homogeneous point [x, y, z, w] corresponds to the three-dimensional point [x/w, y/w, z/w].
            project_points_r = np.array([[+0.8, 1.7, 3, 1]]).reshape(1, 1, 4)


            path = args.dataset_dir + args.sequence + "/frame{0:06d}.png"

            df = pd.read_csv(args.dataset_dir.replace('_frames', '') + 'info/{}-0-static-deleted.csv'.format(seq), sep=",")
            print(df)

            global_pose = np.identity(4)
            poses = [global_pose[0:3, :].reshape(1, 12)]

            for i in range(len(df)):
                v = df['linear_speed'][i]
                dt = 0.1
                r = get_radius(df['real_steer_angle'][i] / WHEEL_STEER_RATIO)
                alpha = v * dt / r
                rot = [0, alpha, 0]
                px, py = rotate_point(0, 0, alpha, -r, 0)
                trans = [px + r, 0, py]
                pose = torch.tensor(trans + rot).reshape(1, 6)
                pose_mat = pose_vec2mat(pose).squeeze(0).cpu().numpy()
                pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])])
                global_pose = global_pose @ np.linalg.inv(pose_mat)
                poses.append(global_pose[0:3, :].reshape(1, 12))

            n = len(poses)
            poses = np.array(poses).reshape(n, 3, 4)

            x = np.zeros((n, 1, 4))
            x[:, :, -1] = 1
            poses = np.concatenate([poses, x], axis=1)
                

            for i in tqdm(range(n - 1)):
                crt_pose = np.stack(inv(poses[i]).dot(x) for x in poses[i:])

                world_points = project_points.dot(crt_pose.transpose((0, 2, 1)))[0, 0]
                world_points_l = project_points_l.dot(crt_pose.transpose((0, 2, 1)))[0, 0]
                world_points_r = project_points_r.dot(crt_pose.transpose((0, 2, 1)))[0, 0]

                show_img = cv2.imread(path.format(i)).astype(np.float32) / 255.

                world_points_show = np.concatenate([
                    world_points[:NUM_SHOW_POINTS][:, :3],
                    world_points_l[:NUM_SHOW_POINTS][:, :3],
                    world_points_r[:NUM_SHOW_POINTS][:, :3]
                ])

                rvec2 = np.eye(3)  # it is almost the identity matrix
                show_points = cv2.projectPoints(world_points_show.astype(np.float64), rvec2, tvec,
                                                camera_matrix, None)[0]
                show_points_l = cv2.projectPoints(world_points_l[:NUM_SHOW_POINTS][:, :3].astype(np.float64), rvec2, tvec,
                                                  camera_matrix, None)[0]
                show_points_r = cv2.projectPoints(world_points_r[:NUM_SHOW_POINTS][:, :3].astype(np.float64), rvec2, tvec,
                                                  camera_matrix, None)[0]
                show_points = show_points.astype(np.int)[:, 0]
                show_points_l = show_points_l.astype(np.int)[:, 0]
                show_points_r = show_points_r.astype(np.int)[:, 0]
                overlay = np.zeros_like(show_img)
                overlay_limited = np.zeros_like(show_img)
                # overlay[:, :, 0] = 255

                ok = True

                # cv2.imshow('img', show_img)  # distances / distances.max())
                # cv2.waitKey(0)

                for it, p1, p2, p3, p4 in zip(range(len(show_points_l) - 1), show_points_l[:-1], show_points_r[:-1],
                                              show_points_l[1:], show_points_r[1:]):
                    x1, y1 = p1
                    x2, y2 = p2
                    x3, y3 = p3
                    x4, y4 = p4
                    pts = np.array([(x1, y1), (x3, y3), (x4, y4), (x2, y2)])

                    overlay = cv2.drawContours(overlay, [pts], 0, (0, 255, 0), cv2.FILLED)

                alpha = 1.0
                show_img_og = np.copy(show_img)
                # show_img = cv2.addWeighted(overlay_limited, alpha, show_img, 1, 0)
                show_img_og = cv2.addWeighted(overlay, alpha, show_img_og, 1, 0)

                if np.sum(overlay) > 0.0:
                	pass
                    # cv2.imwrite(args.segmentation_path + "labels/GTLabels/" + path.format(i).replace('/', '\\'), distances)
                    
                cv2.imshow('res', show_img_og)
                cv2.waitKey(0)
Esempio n. 21
0
def main():
    args = parser.parse_args()
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    if args.gt_type == 'KITTI':
        from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework
    elif args.gt_type == 'stillbox':
        from stillbox_eval.pose_evaluation_utils import test_framework_stillbox as test_framework

    weights = torch.load(args.pretrained_posenet)
    seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3)
    pose_net = PoseNet(seq_length=seq_length).to(device)
    pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    framework = test_framework(dataset_dir, args.sequences, seq_length)

    print('{} snippets to test'.format(len(framework)))
    errors = np.zeros((len(framework), 2), np.float32)
    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()
        predictions_array = np.zeros((len(framework), seq_length, 3, 4))

    for j, sample in enumerate(tqdm(framework)):
        imgs = sample['imgs']

        h,w,_ = imgs[0].shape
        if (not args.no_resize) and (h != args.img_height or w != args.img_width):
            imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs]

        imgs = [torch.from_numpy(np.transpose(img, (2,0,1))) for img in imgs]
        imgs = torch.stack(imgs).unsqueeze(0).to(device)
        imgs = 2*(imgs/255 - 0.5)

        poses = pose_net(imgs)

        inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode)

        transform_matrices = invert_mat(inv_transform_matrices)

        # rot_matrices = np.linalg.inv(inv_transform_matrices[:,:,:3])
        # tr_vectors = rot_matrices @ inv_transform_matrices[:,:,-1:]

        # transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1)

        # first_transform = transform_matrices[0]
        # final_poses = np.linalg.inv(first_transform[:,:3]) @ transform_matrices
        # final_poses[:,:,-1:] -= np.linalg.inv(first_transform[:,:3]) @ first_transform[:,-1:]

        final_poses = compensate_pose(transform_matrices, transform_matrices[:,0])[0].cpu().numpy()

        if args.output_dir is not None:
            predictions_array[j] = final_poses

        ATE, RE = compute_pose_error(sample['poses'][1:], final_poses[1:])
        errors[j] = ATE, RE

    mean_errors = errors.mean(0)
    std_errors = errors.std(0)
    error_names = ['ATE','RE']
    print('')
    print("Results")
    print("\t {:>10}, {:>10}".format(*error_names))
    print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors))
    print("std \t {:10.4f}, {:10.4f}".format(*std_errors))

    if args.output_dir is not None:
        np.save(output_dir/'predictions.npy', predictions_array)
Esempio n. 22
0
def main():
    args = parser.parse_args()
    from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework

    weights = torch.load(args.pretrained_posenet)
    seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3)
    pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1,
                          output_exp=False).to(device)
    pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    framework = test_framework(dataset_dir, args.sequences, seq_length)

    print('{} snippets to test'.format(len(framework)))
    errors = np.zeros((len(framework), 2), np.float32)
    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()
        predictions_array = np.zeros((len(framework), seq_length, 3, 4))

    for j, sample in enumerate(tqdm(framework)):
        imgs = sample['imgs']

        h, w, _ = imgs[0].shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            imgs = [
                imresize(img,
                         (args.img_height, args.img_width)).astype(np.float32)
                for img in imgs
            ]

        imgs = [np.transpose(img, (2, 0, 1)) for img in imgs]

        ref_imgs = []
        for i, img in enumerate(imgs):
            img = torch.from_numpy(img).unsqueeze(0)
            img = ((img / 255 - 0.5) / 0.5).to(device)
            if i == len(imgs) // 2:
                tgt_img = img
            else:
                ref_imgs.append(img)

        _, poses = pose_net(tgt_img, ref_imgs)

        poses = poses.cpu()[0]
        poses = torch.cat([
            poses[:len(imgs) // 2],
            torch.zeros(1, 6).float(), poses[len(imgs) // 2:]
        ])

        inv_transform_matrices = pose_vec2mat(
            poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64)

        rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3])
        tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:]

        transform_matrices = np.concatenate([rot_matrices, tr_vectors],
                                            axis=-1)

        first_inv_transform = inv_transform_matrices[0]
        final_poses = first_inv_transform[:, :3] @ transform_matrices
        final_poses[:, :, -1:] += first_inv_transform[:, -1:]

        if args.output_dir is not None:
            predictions_array[j] = final_poses

        ATE, RE = compute_pose_error(sample['poses'], final_poses)
        errors[j] = ATE, RE

    mean_errors = errors.mean(0)
    std_errors = errors.std(0)
    error_names = ['ATE', 'RE']
    print('')
    print("Results")
    print("\t {:>10}, {:>10}".format(*error_names))
    print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors))
    print("std \t {:10.4f}, {:10.4f}".format(*std_errors))

    if args.output_dir is not None:
        np.save(output_dir / 'predictions.npy', predictions_array)
Esempio n. 23
0
def main():
    args = parser.parse_args()
    from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework
    #net init
    weights = torch.load(args.pretrained_posenet)#权重参数载入,return orderedDict

    seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3)
    #conv1.0.weight .shape = (16,15,7,7),这里注意哈, 网络结构定义的是
    #in_planse = 15, out_plans = 16, kernel_size =7,7
    #但是这里dict存储的时候就是反的???与conv定义前两个是颠倒的!!!

    #seq_lenth ==5由于模型如此
    pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device)
    pose_net.load_state_dict(weights['state_dict'], strict=False)#载入模型参数

    dataset_dir = Path(args.dataset_dir)
    framework = test_framework(dataset_dir, args.sequences, seq_length)

    print('{} snippets to test'.format(len(framework)))
    errors = np.zeros((len(framework), 2), np.float32)
    if args.output_dir is not None:
        output_dir = Path(args.output_dir)
        output_dir.makedirs_p()
        predictions_array = np.zeros((len(framework), seq_length, 3, 4))
# main cycle
    for j, sample in enumerate(tqdm(framework)) :#j from 0~1591#tqdm(obj)调用__iter__
        if j>100:
            break;
        imgs = sample['imgs']#[375,1242,3]

        h,w,_ = imgs[0].shape
        if (not args.no_resize) and (h != args.img_height or w != args.img_width):
            imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs] #[128,416,3]

        imgs = [np.transpose(img, (2,0,1)) for img in imgs]#[3,128,416],201 通道提前

        ref_imgs = []
        for i, img in enumerate(imgs):
            img = torch.from_numpy(img).unsqueeze(0)
            img = ((img/255 - 0.5)/0.5).to(device)
            if i == len(imgs)//2:
                tgt_img = img
            else:
                ref_imgs.append(img)
    #pose predict
        #tgt_img size [1,3,h,w]
        #ref :list of [1,3,h,w], lenth 5
        _, poses = pose_net(tgt_img, ref_imgs)#return exp_mask,pose,#
        # 这里的1是因为在训练的时候需要batch_size输入,但其他时候不需要
        #pose tensorsize =( 1,num_ref_imgs(4),6)

        poses = poses.cpu()[0]#(4,6)
        poses = torch.cat([poses[:len(imgs)//2], torch.zeros(1,6).float(), poses[len(imgs)//2:]])#中间插入全0, 代表关键帧
        #相对于自己自运动为0,[5,6]

        inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64)
        #shape = 5,3,4

        rot_matrices = np.linalg.inv(inv_transform_matrices[:,:,:3])
        tr_vectors = -rot_matrices @ inv_transform_matrices[:,:,-1:]

        transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1)

        first_inv_transform = inv_transform_matrices[0]
        final_poses = first_inv_transform[:,:3] @ transform_matrices
        final_poses[:,:,-1:] += first_inv_transform[:,-1:]#5,3,4



        if args.output_dir is not None:#forwad pass 结果记录一下,留着输出
            predictions_array[j] = final_poses

        ATE, RE = compute_pose_error(sample['poses'], final_poses)
        errors[j] = ATE, RE

    mean_errors = errors.mean(0)
    std_errors = errors.std(0)
    error_names = ['ATE','RE']
    print('')
    print("Results")
    print("\t {:>10}, {:>10}".format(*error_names))
    print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors))
    print("std \t {:10.4f}, {:10.4f}".format(*std_errors))

    if args.output_dir is not None:
        np.save(output_dir/'predictions.npy', predictions_array)
Esempio n. 24
0
def validate_with_gt_pose(args,
                          val_loader,
                          disp_net,
                          pose_exp_net,
                          epoch,
                          logger,
                          tb_writer,
                          sample_nb_to_log=3):
    global device
    batch_time = AverageMeter()
    depth_error_names = ['abs_diff', 'abs_rel', 'sq_rel', 'a1', 'a2', 'a3']
    depth_errors = AverageMeter(i=len(depth_error_names), precision=4)
    pose_error_names = ['ATE', 'RTE']
    pose_errors = AverageMeter(i=2, precision=4)
    log_outputs = sample_nb_to_log > 0
    # Output the logs throughout the whole dataset
    batches_to_log = list(
        np.linspace(0, len(val_loader), sample_nb_to_log).astype(int))
    poses_values = np.zeros(
        ((len(val_loader) - 1) * args.batch_size * (args.sequence_length - 1),
         6))
    disp_values = np.zeros(((len(val_loader) - 1) * args.batch_size * 3))

    # switch to evaluate mode
    disp_net.eval()
    pose_exp_net.eval()

    end = time.time()
    logger.valid_bar.update(0)
    for i, (tgt_img, ref_imgs, gt_depth, gt_poses) in enumerate(val_loader):
        tgt_img = tgt_img.to(device)
        gt_depth = gt_depth.to(device)
        gt_poses = gt_poses.to(device)
        ref_imgs = [img.to(device) for img in ref_imgs]
        b = tgt_img.shape[0]

        # compute output
        output_disp = disp_net(tgt_img)
        output_depth = 1 / output_disp
        explainability_mask, output_poses = pose_exp_net(tgt_img, ref_imgs)

        reordered_output_poses = torch.cat([
            output_poses[:, :gt_poses.shape[1] // 2],
            torch.zeros(b, 1, 6).to(output_poses),
            output_poses[:, gt_poses.shape[1] // 2:]
        ],
                                           dim=1)

        # pose_vec2mat only takes B, 6 tensors, so we simulate a batch dimension of B * seq_length
        unravelled_poses = reordered_output_poses.reshape(-1, 6)
        unravelled_matrices = pose_vec2mat(unravelled_poses,
                                           rotation_mode=args.rotation_mode)
        inv_transform_matrices = unravelled_matrices.reshape(b, -1, 3, 4)

        rot_matrices = inv_transform_matrices[..., :3].transpose(-2, -1)
        tr_vectors = -rot_matrices @ inv_transform_matrices[..., -1:]

        transform_matrices = torch.cat([rot_matrices, tr_vectors], axis=-1)

        first_inv_transform = inv_transform_matrices.reshape(b, -1, 3,
                                                             4)[:, :1]
        final_poses = first_inv_transform[..., :3] @ transform_matrices
        final_poses[..., -1:] += first_inv_transform[..., -1:]
        final_poses = final_poses.reshape(b, -1, 3, 4)

        if log_outputs and i in batches_to_log:  # log first output of wanted batches
            index = batches_to_log.index(i)
            if epoch == 0:
                for j, ref in enumerate(ref_imgs):
                    tb_writer.add_image('val Input {}/{}'.format(j, index),
                                        tensor2array(tgt_img[0]), 0)
                    tb_writer.add_image('val Input {}/{}'.format(j, index),
                                        tensor2array(ref[0]), 1)

            log_output_tensorboard(tb_writer, 'val', index, '', epoch,
                                   output_depth, output_disp, None, None,
                                   explainability_mask)

        if log_outputs and i < len(val_loader) - 1:
            step = args.batch_size * (args.sequence_length - 1)
            poses_values[i * step:(i + 1) * step] = output_poses.cpu().view(
                -1, 6).numpy()
            step = args.batch_size * 3
            disp_unraveled = output_disp.cpu().view(args.batch_size, -1)
            disp_values[i * step:(i + 1) * step] = torch.cat([
                disp_unraveled.min(-1)[0],
                disp_unraveled.median(-1)[0],
                disp_unraveled.max(-1)[0]
            ]).numpy()

        depth_errors.update(compute_depth_errors(gt_depth, output_depth[:, 0]))
        pose_errors.update(compute_pose_errors(gt_poses, final_poses))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        logger.valid_bar.update(i + 1)
        if i % args.print_freq == 0:
            logger.valid_writer.write(
                'valid: Time {} Abs Error {:.4f} ({:.4f}), ATE {:.4f} ({:.4f})'
                .format(batch_time, depth_errors.val[0], depth_errors.avg[0],
                        pose_errors.val[0], pose_errors.avg[0]))
    if log_outputs:
        prefix = 'valid poses'
        coeffs_names = ['tx', 'ty', 'tz']
        if args.rotation_mode == 'euler':
            coeffs_names.extend(['rx', 'ry', 'rz'])
        elif args.rotation_mode == 'quat':
            coeffs_names.extend(['qx', 'qy', 'qz'])
        for i in range(poses_values.shape[1]):
            tb_writer.add_histogram('{} {}'.format(prefix, coeffs_names[i]),
                                    poses_values[:, i], epoch)
        tb_writer.add_histogram('disp_values', disp_values, epoch)
    logger.valid_bar.update(len(val_loader))
    return depth_errors.avg + pose_errors.avg, depth_error_names + pose_error_names
Esempio n. 25
0
def validate_without_gt(args,
                        val_loader,
                        disp_net,
                        pose_exp_net,
                        epoch,
                        logger,
                        tb_writer,
                        sample_nb_to_log=3):
    global device
    batch_time = AverageMeter()
    losses = AverageMeter(i=3, precision=4)
    log_outputs = sample_nb_to_log > 0
    w1, w2, w3 = args.photo_loss_weight, args.mask_loss_weight, args.smooth_loss_weight
    poses = np.zeros(
        ((len(val_loader) - 1) * args.batch_size * (args.sequence_length - 1),
         6))
    disp_values = np.zeros(((len(val_loader) - 1) * args.batch_size * 3))

    # switch to evaluate mode
    disp_net.eval()
    pose_exp_net.eval()

    end = time.time()
    logger.valid_bar.update(0)
    for i, (tgt_img, ref_imgs, intrinsics,
            intrinsics_inv) in enumerate(val_loader):
        tgt_img = tgt_img.to(device)
        ref_imgs = [img.to(device) for img in ref_imgs]
        intrinsics = intrinsics.to(device)
        intrinsics_inv = intrinsics_inv.to(device)

        # compute output
        disp = disp_net(tgt_img)
        depth = 1 / disp
        explainability_mask, pose = pose_exp_net(tgt_img, ref_imgs)

        loss_1, warped, diff = photometric_reconstruction_loss(
            tgt_img, ref_imgs, intrinsics, depth, explainability_mask, pose,
            args.rotation_mode, args.padding_mode)
        loss_1 = loss_1.item()
        if w2 > 0:
            loss_2 = explainability_loss(explainability_mask).item()
        else:
            loss_2 = 0
        loss_3 = smooth_loss(depth).item()

        if log_outputs and i < sample_nb_to_log - 1:  # log first output of first batches
            if epoch == 0:
                for j, ref in enumerate(ref_imgs):
                    tb_writer.add_image('val Input {}/{}'.format(j, i),
                                        tensor2array(tgt_img[0]), 0)
                    tb_writer.add_image('val Input {}/{}'.format(j, i),
                                        tensor2array(ref[0]), 1)

            log_output_tensorboard(tb_writer, 'val', i, '', epoch, 1. / disp,
                                   disp, warped[0], diff[0],
                                   explainability_mask)

        if log_outputs and i < len(val_loader) - 1:
            step = args.batch_size * (args.sequence_length - 1)
            poses[i * step:(i + 1) * step] = pose.cpu().view(-1, 6).numpy()
            step = args.batch_size * 3
            disp_unraveled = disp.cpu().view(args.batch_size, -1)
            disp_values[i * step:(i + 1) * step] = torch.cat([
                disp_unraveled.min(-1)[0],
                disp_unraveled.median(-1)[0],
                disp_unraveled.max(-1)[0]
            ]).numpy()

        loss = w1 * loss_1 + w2 * loss_2 + w3 * loss_3

        if args.with_photocon_loss:
            batch_size = pose.size()[0]
            homo_row = torch.tensor([[0, 0, 0, 1]],
                                    dtype=torch.float).to(device)
            homo_row = homo_row.unsqueeze(0).expand(batch_size, -1, -1)
            T21 = pose_vec2mat(pose[:, 0])
            T21 = torch.cat((T21, homo_row), 1)
            T12 = torch.inverse(T21)
            T23 = pose_vec2mat(pose[:, 1])
            T23 = torch.cat((T23, homo_row), 1)
            T13 = torch.matmul(T23, T12)  #[B,4,4]
            #             print("----",T13.size())
            # target = 1(ref_imgs[0]) and ref = 3(ref_imgs[1])
            ref_img_warped, valid_points = inverse_warp_posemat(
                ref_imgs[1], depth[:, 0], T13, intrinsics, args.rotation_mode,
                args.padding_mode)
            diff = (ref_imgs[0] -
                    ref_img_warped) * valid_points.unsqueeze(1).float()
            loss_4 = diff.abs().mean()

            loss += loss_4

        losses.update([loss, loss_1, loss_2])

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        logger.valid_bar.update(i + 1)
        if i % args.print_freq == 0:
            logger.valid_writer.write('valid: Time {} Loss {}'.format(
                batch_time, losses))
    if log_outputs:
        prefix = 'valid poses'
        coeffs_names = ['tx', 'ty', 'tz']
        if args.rotation_mode == 'euler':
            coeffs_names.extend(['rx', 'ry', 'rz'])
        elif args.rotation_mode == 'quat':
            coeffs_names.extend(['qx', 'qy', 'qz'])
        for i in range(poses.shape[1]):
            tb_writer.add_histogram('{} {}'.format(prefix, coeffs_names[i]),
                                    poses[:, i], epoch)
        tb_writer.add_histogram('disp_values', disp_values, epoch)
    logger.valid_bar.update(len(val_loader))
    return losses.avg, [
        'Validation Total loss', 'Validation Photo loss', 'Validation Exp loss'
    ]
def main():
    global best_error, worst_error
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    args = parser.parse_args()
    if args.gt_type == 'KITTI':
        from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework
    elif args.gt_type == 'stillbox':
        from stillbox_eval.depth_evaluation_utils import test_framework_stillbox as test_framework

    weights = torch.load(args.pretrained_depthnet)
    depth_net = DepthNet(depth_activation="elu", batch_norm='bn' in weights.keys() and weights['bn']).to(device)

    depth_net.load_state_dict(weights['state_dict'])
    depth_net.eval()

    if args.pretrained_posenet is None:
        args.stabilize_from_GT = True
        print('no PoseNet specified, stab will be done from ground truth')
        seq_length = 5
    else:
        weights = torch.load(args.pretrained_posenet)
        seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3)
        pose_net = PoseNet(seq_length=seq_length).to(device)
        pose_net.load_state_dict(weights['state_dict'], strict=False)

    dataset_dir = Path(args.dataset_dir)
    if args.dataset_list is not None:
        with open(args.dataset_list, 'r') as f:
            test_files = list(f.read().splitlines())
    else:
        test_files = [file.relpathto(dataset_dir) for file in sum([dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], [])]

    framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth)

    print('{} files to test'.format(len(test_files)))
    errors = np.zeros((7, len(test_files)), np.float32)

    args.output_dir = Path(args.output_dir)
    args.output_dir.makedirs_p()

    for j, sample in enumerate(tqdm(framework)):
        imgs = sample['imgs']
        intrinsics = sample['intrinsics'].copy()

        h,w,_ = imgs[0].shape
        if (not args.no_resize) and (h != args.img_height or w != args.img_width):
            imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs]
            intrinsics[0] *= args.img_width/w
            intrinsics[1] *= args.img_height/h

        intrinsics_inv = np.linalg.inv(intrinsics)

        intrinsics = torch.from_numpy(intrinsics).unsqueeze(0).to(device)
        intrinsics_inv = torch.from_numpy(intrinsics_inv).unsqueeze(0).to(device)
        imgs = [torch.from_numpy(np.transpose(img, (2,0,1))) for img in imgs]
        imgs = torch.stack(imgs).unsqueeze(0).to(device)
        imgs = 2*(imgs/255 - 0.5)

        tgt_img = imgs[:,sample['tgt_index']]

        # Construct a batch of all possible stabilized pairs, with PoseNet or with GT orientation, will take the output closest to target mean depth
        if args.stabilize_from_GT:
            poses_GT = Variable(torch.from_numpy(sample['poses']).cuda()).unsqueeze(0)
            inv_poses_GT = invert_mat(poses_GT)
            tgt_pose = inv_poses_GT[:,sample['tgt_index']]
            inv_transform_matrices_tgt = compensate_pose(inv_poses_GT, tgt_pose)
        else:
            poses = pose_net(imgs)
            inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode)

            tgt_pose = inv_transform_matrices[:,sample['tgt_index']]
            inv_transform_matrices_tgt = compensate_pose(inv_transform_matrices, tgt_pose)

        stabilized_pairs = []
        corresponding_displ = []
        for i in range(seq_length):
            if i == sample['tgt_index']:
                continue
            img = imgs[:,i]
            img_pose = inv_transform_matrices_tgt[:,i]
            stab_img = inverse_rotate(img, img_pose[:,:,:3], intrinsics, intrinsics_inv)
            pair = torch.cat([stab_img, tgt_img], dim=1)  # [1, 6, H, W]
            stabilized_pairs.append(pair)

            GT_translations = sample['poses'][:,:,-1]
            real_displacement = np.linalg.norm(GT_translations[sample['tgt_index']] - GT_translations[i])
            corresponding_displ.append(real_displacement)
        stab_batch = torch.cat(stabilized_pairs)  # [seq, 6, H, W]
        depth_maps = depth_net(stab_batch)  # [seq, 1 , H/4, W/4]

        selected_depth, selected_index = select_best_map(depth_maps, target_mean_depthnet_output)

        pred_depth = selected_depth.cpu().data.numpy() * corresponding_displ[selected_index] / args.nominal_displacement

        if args.save_output:
            if j == 0:
                predictions = np.zeros((len(test_files), *pred_depth.shape))
            predictions[j] = 1/pred_depth

        gt_depth = sample['gt_depth']
        pred_depth_zoomed = zoom(pred_depth,
                                 (gt_depth.shape[0]/pred_depth.shape[0],
                                  gt_depth.shape[1]/pred_depth.shape[1])
                                 ).clip(args.min_depth, args.max_depth)
        if sample['mask'] is not None:
            pred_depth_zoomed_masked = pred_depth_zoomed[sample['mask']]
            gt_depth = gt_depth[sample['mask']]
        errors[:,j] = compute_errors(gt_depth, pred_depth_zoomed_masked)
        if args.log_best_worst:
            if best_error > errors[0,j]:
                best_error = errors[0,j]
                log_result(pred_depth_zoomed, sample['gt_depth'], stab_batch, selected_index, args.output_dir, 'best')
            if worst_error < errors[0,j]:
                worst_error = errors[0,j]
                log_result(pred_depth_zoomed, sample['gt_depth'], stab_batch, selected_index, args.output_dir, 'worst')

    mean_errors = errors.mean(1)
    error_names = ['abs_rel','sq_rel','rms','log_rms','a1','a2','a3']

    print("Results : ")
    print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names))
    print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors))

    if args.save_output:
        np.save(args.output_dir/'predictions.npy', predictions)
    def compose_transforms(self, frames, translation_scale):
        print(self.camera_matrix)
        global_pose = np.identity(4)
        poses = [global_pose[0:3, :].reshape(1, 12)]

        img1 = frames[0]
        tensor_img1 = self.preprocess_img(img1)

        for i in tqdm(range(len(frames) - 1)):
            img2 = frames[i + 1]
            tensor_img2 = self.preprocess_img(img2)

            pose = self.pose_net(tensor_img1, tensor_img2)
            pose_mat = pose_vec2mat(pose).squeeze(0).cpu().detach().numpy()
            pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])])
            global_pose = global_pose @ np.linalg.inv(pose_mat)

            poses.append(global_pose[0:3, :].reshape(1, 12))

            # update
            tensor_img1 = tensor_img2

        transforms = np.concatenate(poses, axis=0)

        #concatenate all transforms into a np array
        pose = np.array(transforms).reshape((len(transforms), 3, 4))

        x = np.zeros((len(pose), 1, 4))
        x[:, :, -1] = 1

        # add the last row to the pose arrays
        pose = np.concatenate([pose, x], axis=1)

        # scale the translations
        pose[:, :, -1] = translation_scale * pose[:, :, -1]

        # this is with the ground truth pose
        # df_gt = pd.read_csv("/HDD1_2TB/storage/KITTI/data_odometry_color/dataset/poses/09.txt", sep=" ", header=None)
        # pose_gt = df_gt.values.reshape((len(df_gt), 3, 4))
        #
        # x_gt = np.zeros((len(pose_gt), 1, 4))
        #
        # x_gt[:, :, -1] = 1
        #
        # pose_gt = np.concatenate([pose_gt, x_gt], axis=1)

        # compute the relative pose between the first pose and every following one
        crt_pose = np.stack(inv(pose[0]).dot(x) for x in pose[0:])

        rvec = np.array([0., 0., 0])
        tvec = np.array([0., 0., 0])
        rvec, _ = cv2.Rodrigues(rvec)

        # the _l and _r points correspond to the left and right wheels
        # homogeneous point [x, y, z, w] corresponds to the three-dimensional point [x/w, y/w, z/w].
        project_points = np.array([[0, 1.7, 3, 1]]).reshape(1, 1, 4)
        # homogeneous point [x, y, z, w] corresponds to the three-dimensional point [x/w, y/w, z/w].
        project_points_l = np.array([[-0.8, 1.7, 3, 1]]).reshape(1, 1, 4)
        # homogeneous point [x, y, z, w] corresponds to the three-dimensional point [x/w, y/w, z/w].
        project_points_r = np.array([[+0.8, 1.7, 3, 1]]).reshape(1, 1, 4)

        world_points = project_points.dot(crt_pose.transpose((0, 2, 1)))[0, 0]
        world_points_l = project_points_l.dot(crt_pose.transpose((0, 2, 1)))[0,
                                                                             0]
        world_points_r = project_points_r.dot(crt_pose.transpose((0, 2, 1)))[0,
                                                                             0]

        # show points are for displaying the world points on a frame
        world_points_show = np.concatenate([
            world_points[:, :3], world_points_l[:, :3], world_points_r[:, :3]
        ])

        rvec2 = crt_pose[0][:3, :3]  # it is almost the identity matrix
        show_points = cv2.projectPoints(world_points_show.astype(np.float64),
                                        rvec2, tvec, self.camera_matrix,
                                        None)[0]
        show_points_l = cv2.projectPoints(
            world_points_l[:, :3].astype(np.float64), rvec2, tvec,
            self.camera_matrix, None)[0]
        show_points_r = cv2.projectPoints(
            world_points_r[:, :3].astype(np.float64), rvec2, tvec,
            self.camera_matrix, None)[0]

        show_points = show_points.astype(np.int)[:, 0]
        show_points_l = show_points_l.astype(np.int)[:, 0]
        show_points_r = show_points_r.astype(np.int)[:, 0]

        img1 = imresize(frames[0],
                        (self.img_height, self.img_width)).astype(np.float32)
        overlay = np.zeros_like(img1)

        for it, p1, p2, p3, p4 in zip(range(len(show_points_l) - 1),
                                      show_points_l[:-1], show_points_r[:-1],
                                      show_points_l[1:], show_points_r[1:]):
            x1, y1 = p1
            x2, y2 = p2
            x3, y3 = p3
            x4, y4 = p4
            pts = np.array([(x1, y1), (x3, y3), (x4, y4), (x2, y2)])

            overlay = cv2.drawContours(overlay, [pts], 0, (0, 255, 0),
                                       cv2.FILLED)

        alpha = 1.0
        show_img = cv2.addWeighted(overlay, alpha, img1 / 255.0, 1, 0)

        cv2.imshow('path', show_img)
        cv2.waitKey(0)

        return world_points, world_points_l, world_points_r