Example #1
0
def create_refinement_inputs(root_dir, train_eval_dir, classes,
                             intrinsic_matrix):
    correspondence_block = UNET.UNet(n_channels=3,
                                     out_channels_id=14,
                                     out_channels_uv=256,
                                     bilinear=True)
    correspondence_block.cuda()
    correspondence_block_filename = os.path.join(train_eval_dir,
                                                 'correspondence_block.pt')
    correspondence_block.load_state_dict(
        torch.load(correspondence_block_filename,
                   map_location=torch.device('cpu')))

    train_data = LineMODDataset(root_dir,
                                classes=classes,
                                transform=transforms.Compose(
                                    [transforms.ToTensor()]))

    upsampled = nn.Upsample(size=[240, 320],
                            mode='bilinear',
                            align_corners=False)

    regex = re.compile(r'\d+')
    count = 0
    for i in range(len(train_data)):
        if i % 1000 == 0:
            print(str(i) + "/" + str(len(train_data)) + " finished!")
        img_adr, img, _, _, _ = train_data[i]

        label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1]
        idx = regex.findall(os.path.split(img_adr)[1])[0]
        adr_rendered = root_dir + label + \
            "/pose_refinement/rendered/color" + str(idx) + ".png"
        adr_img = root_dir + label + \
            "/pose_refinement/real/color" + str(idx) + ".png"
        # find the object in the image using the idmask
        img = img.view(1, img.shape[0], img.shape[1], img.shape[2])
        idmask_pred, _, _ = correspondence_block(img.cuda())
        idmask = torch.argmax(idmask_pred, dim=1).squeeze().cpu()
        coord_2d = (idmask == classes[label]).nonzero(as_tuple=True)
        if coord_2d[0].nelement() != 0:
            coord_2d = torch.cat(
                (coord_2d[0].view(coord_2d[0].shape[0], 1), coord_2d[1].view(
                    coord_2d[1].shape[0], 1)), 1)
            min_x = coord_2d[:, 0].min()
            max_x = coord_2d[:, 0].max()
            min_y = coord_2d[:, 1].min()
            max_y = coord_2d[:, 1].max()
            img = img.squeeze().transpose(1, 2).transpose(0, 2)
            obj_img = img[min_x:max_x + 1, min_y:max_y + 1, :]
            # saving in the correct format using upsampling
            obj_img = obj_img.transpose(0, 1).transpose(0, 2).unsqueeze(dim=0)
            obj_img = upsampled(obj_img)
            obj_img = obj_img.squeeze().transpose(0, 2).transpose(0, 1)

            # It appears obj_img occasionally has values slightly larger than 1.0.
            # So I'm assuming that we're only clamping by a slight amount here.
            # If that's not the case, we need to scale.
            obj_img = torch.clamp(obj_img, 0.0, 1.0)
            mpimg.imsave(adr_img, obj_img.squeeze().numpy())

            # create rendering for an object
            cropped_rendered_image = create_rendering(root_dir,
                                                      intrinsic_matrix, label,
                                                      idx)
            rendered_img = torch.from_numpy(cropped_rendered_image)
            rendered_img = rendered_img.unsqueeze(dim=0)
            rendered_img = rendered_img.transpose(1, 3).transpose(2, 3)
            rendered_img = upsampled(rendered_img)
            rendered_img = rendered_img.squeeze().transpose(0,
                                                            2).transpose(0, 1)
            mpimg.imsave(adr_rendered, rendered_img.numpy())

        else:  # object not present in idmask prediction
            count += 1
            mpimg.imsave(adr_rendered, np.zeros((240, 320)))
            mpimg.imsave(adr_img, np.zeros((240, 320)))
    print("Number of outliers: ", count)
Example #2
0
def initial_pose_estimation(root_dir, train_eval_dir, classes,
                            intrinsic_matrix):

    for c in classes:
        class_pred_pose_fname = os.path.join(train_eval_dir, c,
                                             "predicted_pose")
        if not os.path.exists(class_pred_pose_fname):
            os.makedirs(class_pred_pose_fname)

    # LineMOD Dataset
    train_data = LineMODDataset(root_dir,
                                train_eval_dir,
                                classes=classes,
                                transform=transforms.Compose(
                                    [transforms.ToTensor()]))

    # load the best correspondence block weights
    correspondence_block = UNET.UNet(n_channels=3,
                                     out_channels_id=14,
                                     out_channels_uv=256,
                                     bilinear=True)
    correspondence_block.cuda()
    correspondence_block_filename = os.path.join(train_eval_dir,
                                                 'correspondence_block.pt')
    correspondence_block.load_state_dict(
        torch.load(correspondence_block_filename,
                   map_location=torch.device('cpu')))

    # initial 6D pose prediction
    regex = re.compile(r'\d+')
    outliers = 0
    for i in range(len(train_data)):
        if i % 100 == 0:
            print(str(i) + "/" + str(len(train_data)) + " finished!")
        img_adr, img, idmask, _, _ = train_data[i]
        label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1]
        idx = regex.findall(os.path.split(img_adr)[1])[0]
        img = img.view(1, img.shape[0], img.shape[1], img.shape[2])
        idmask_pred, umask_pred, vmask_pred = correspondence_block(img.cuda())
        # convert the masks to 240,320 shape
        temp = torch.argmax(idmask_pred, dim=1).squeeze().cpu()
        upred = torch.argmax(umask_pred, dim=1).squeeze().cpu()
        vpred = torch.argmax(vmask_pred, dim=1).squeeze().cpu()
        coord_2d = (temp == classes[label]).nonzero(as_tuple=True)

        adr = os.path.join(
            train_eval_dir,
            label + "/predicted_pose/" + "info_" + str(idx) + ".txt")

        coord_2d = torch.cat(
            (coord_2d[0].view(coord_2d[0].shape[0], 1), coord_2d[1].view(
                coord_2d[1].shape[0], 1)), 1)
        uvalues = upred[coord_2d[:, 0], coord_2d[:, 1]]
        vvalues = vpred[coord_2d[:, 0], coord_2d[:, 1]]
        dct_keys = torch.cat((uvalues.view(-1, 1), vvalues.view(-1, 1)), 1)
        dct_keys = tuple(dct_keys.numpy())
        dct = load_obj(os.path.join(root_dir, label + "/UV-XYZ_mapping"))
        mapping_2d = []
        mapping_3d = []
        for count, (u, v) in enumerate(dct_keys):
            if (u, v) in dct:
                mapping_2d.append(np.array(coord_2d[count]))
                mapping_3d.append(dct[(u, v)])
        # Get the 6D pose from rotation and translation matrices
        # PnP needs atleast 6 unique 2D-3D correspondences to run
        if len(mapping_2d) >= 4 or len(mapping_3d) >= 4:
            _, rvecs, tvecs, inliers = cv2.solvePnPRansac(
                np.array(mapping_3d, dtype=np.float32),
                np.array(mapping_2d, dtype=np.float32),
                intrinsic_matrix,
                distCoeffs=None,
                iterationsCount=150,
                reprojectionError=1.0,
                flags=cv2.SOLVEPNP_P3P)
            rot, _ = cv2.Rodrigues(rvecs, jacobian=None)
            rot[np.isnan(rot)] = 1
            tvecs[np.isnan(tvecs)] = 1
            tvecs = np.where(-100 < tvecs, tvecs, np.array([-100.]))
            tvecs = np.where(tvecs < 100, tvecs, np.array([100.]))
            rot_tra = np.append(rot, tvecs, axis=1)
            # save the predicted pose
            np.savetxt(adr, rot_tra)
        else:  # save a pose full of zeros
            outliers += 1
            rot_tra = np.ones((3, 4))
            rot_tra[:, 3] = 0
            np.savetxt(adr, rot_tra)
    print("Number of instances where PnP couldn't be used: ", outliers)
Example #3
0
def train_correspondence_block(root_dir, classes, epochs=10):

    train_data = LineMODDataset(root_dir,
                                classes=classes,
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.ColorJitter(brightness=0,
                                                           contrast=0,
                                                           saturation=0,
                                                           hue=0)
                                ]))

    batch_size = 4
    num_workers = 0
    valid_size = 0.2
    # obtain training indices that will be used for validation
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # prepare data loaders (combine dataset and sampler)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=num_workers)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=num_workers)

    # architecture for correspondence block - 13 objects + backgound = 14 channels for ID masks
    correspondence_block = UNET.UNet(n_channels=3,
                                     out_channels_id=14,
                                     out_channels_uv=256,
                                     bilinear=True)
    correspondence_block.cuda()

    # custom loss function and optimizer
    criterion_id = nn.CrossEntropyLoss()
    criterion_u = nn.CrossEntropyLoss()
    criterion_v = nn.CrossEntropyLoss()

    # specify optimizer
    optimizer = optim.Adam(correspondence_block.parameters(),
                           lr=3e-4,
                           weight_decay=3e-5)

    # training loop

    # number of epochs to train the model
    n_epochs = epochs

    valid_loss_min = np.Inf  # track change in validation loss

    for epoch in range(1, n_epochs + 1):
        # keep track of training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        print("------ Epoch ", epoch, " ---------")

        ###################
        # train the model #
        ###################
        correspondence_block.train()
        for _, image, idmask, umask, vmask in train_loader:
            # move tensors to GPU if CUDA is available
            image, idmask, umask, vmask = image.cuda(), idmask.cuda(
            ), umask.cuda(), vmask.cuda()
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            idmask_pred, umask_pred, vmask_pred = correspondence_block(image)
            # calculate the batch loss
            loss_id = criterion_id(idmask_pred, idmask)
            loss_u = criterion_u(umask_pred, umask)
            loss_v = criterion_v(vmask_pred, vmask)
            loss = loss_id + loss_u + loss_v
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update training loss
            train_loss += loss.item()
        ######################
        # validate the model #
        ######################
        correspondence_block.eval()
        for _, image, idmask, umask, vmask in valid_loader:
            # move tensors to GPU if CUDA is available
            image, idmask, umask, vmask = image.cuda(), idmask.cuda(
            ), umask.cuda(), vmask.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            idmask_pred, umask_pred, vmask_pred = correspondence_block(image)
            # calculate the batch loss
            loss_id = criterion_id(idmask_pred, idmask)
            loss_u = criterion_u(umask_pred, umask)
            loss_v = criterion_v(vmask_pred, vmask)
            loss = loss_id + loss_u + loss_v
            # update average validation loss
            valid_loss += loss.item()

        # calculate average losses
        train_loss = train_loss / len(train_loader.sampler)
        valid_loss = valid_loss / len(valid_loader.sampler)

        # print training/validation statistics
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.
              format(epoch, train_loss, valid_loss))

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print(
                'Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'
                .format(valid_loss_min, valid_loss))
            torch.save(correspondence_block.state_dict(),
                       'correspondence_block.pt')
            valid_loss_min = valid_loss
Example #4
0
def train_correspondence_block(root_dir, train_eval_dir, classes, epochs=10, batch_size=4, \
                               accum_grad_batch_size=1, out_path_and_name=None, corr_transfer=None):

    train_data = LineMODDataset(root_dir,
                                train_eval_dir,
                                classes=classes,
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.ColorJitter(brightness=0,
                                                           contrast=0,
                                                           saturation=0,
                                                           hue=0)
                                ]))

    num_workers = 0
    valid_size = 0.2
    # obtain training indices that will be used for validation
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # prepare data loaders (combine dataset and sampler)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=num_workers)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=num_workers)

    # architecture for correspondence block - 13 objects + backgound = 14 channels for ID masks
    correspondence_block = UNET.UNet(n_channels=3,
                                     out_channels_id=14,
                                     out_channels_uv=256,
                                     bilinear=True)

    if corr_transfer:
        print("Initializing correspondence block from: %s" % corr_transfer)
        correspondence_block.load_state_dict(
            torch.load(corr_transfer, map_location=torch.device('cpu')))

    correspondence_block.cuda()

    if 0:
        from torchsummary import summary
        summary(correspondence_block, input_size=(3, 240, 320))
        sys.exit(1)

    # custom loss function and optimizer
    weight_classes = False
    if weight_classes:
        # Using weighted version for class mask as mentioned in the paper
        # However, not sure what the weighting is, so taking a guess
        # Note we don't need to normalize when using the default 'reduction' arg
        class_weights = np.ones(len(classes) + 1)  # +1 for background
        class_weights[0] = 0.1
        criterion_id = nn.CrossEntropyLoss(
            torch.tensor(class_weights, dtype=torch.float32).cuda())
    else:
        criterion_id = nn.CrossEntropyLoss()

    criterion_u = nn.CrossEntropyLoss()
    criterion_v = nn.CrossEntropyLoss()

    # specify optimizer
    optimizer = optim.Adam(correspondence_block.parameters(),
                           lr=3e-4,
                           weight_decay=3e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     patience=2,
                                                     verbose=True)

    # number of epochs to train the model
    n_epochs = epochs
    # track change in validation loss
    valid_loss_min = np.Inf

    if accum_grad_batch_size != 1:
        print("Gradient accumulator batch size: %i" % accum_grad_batch_size)

    # training loop
    for epoch in range(1, n_epochs + 1):
        # keep track of training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        train_idmask_loss = train_umask_loss = train_vmask_loss = 0.0
        valid_idmask_loss = valid_umask_loss = valid_vmask_loss = 0.0

        print("------ Epoch ", epoch, " ---------")
        print("Training...")

        ###################
        # train the model #
        ###################
        batch_cnt = 0
        correspondence_block.train()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        for img_adr, image, idmask, umask, vmask in train_loader:
            assert image.shape[1] == correspondence_block.n_channels, \
                    f'Network has been defined with {correspondence_block.n_channels} input channels, ' \
                    f'but loaded images have {image.shape[1]} channels. Please check that ' \
                    'the images are loaded correctly.'
            if batch_cnt % 100 == 0:
                print("Batch %i/%i finished!" %
                      (batch_cnt, len(train_idx) / batch_size))
            # move tensors to GPU if CUDA is available
            image, idmask, umask, vmask = image.cuda(), idmask.cuda(
            ), umask.cuda(), vmask.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            idmask_pred, umask_pred, vmask_pred = correspondence_block(image)
            # Calculate the batch loss
            # Need to divide by accum_grad_batch_size to account for accumulated gradients
            loss_id = criterion_id(idmask_pred, idmask) / accum_grad_batch_size
            loss_u = criterion_u(umask_pred, umask) / accum_grad_batch_size
            loss_v = criterion_v(vmask_pred, vmask) / accum_grad_batch_size
            total_loss = loss_id + loss_u + loss_v
            # backward pass: compute gradient of the loss with respect to model parameters
            total_loss.backward()
            # update training loss
            # Note that .item() also releases the memory. DON'T accumulate the criterion obj
            train_idmask_loss += loss_id.item()
            train_umask_loss += loss_u.item()
            train_vmask_loss += loss_v.item()
            train_loss += total_loss.item()
            # Only update once every accum_grad_batch_size
            if (batch_cnt + 1) % accum_grad_batch_size == 0:
                # perform a single optimization step (parameter update)
                optimizer.step()
                # Reset gradients, for next accumulated batch
                optimizer.zero_grad()
            batch_cnt += 1

        ######################
        # validate the model #
        ######################
        print("Validating...")
        correspondence_block.eval()
        optimizer.zero_grad()
        batch_cnt = 0
        with torch.no_grad():  # This is critical to limit GPU memory use
            for img_adr, image, idmask, umask, vmask in valid_loader:
                if batch_cnt % 100 == 0:
                    print("Batch %i/%i finished!" %
                          (batch_cnt, len(valid_idx) / batch_size))
                # move tensors to GPU if CUDA is available
                image, idmask, umask, vmask = image.cuda(), idmask.cuda(
                ), umask.cuda(), vmask.cuda()
                # forward pass: compute predicted outputs by passing inputs to the model
                idmask_pred, umask_pred, vmask_pred = correspondence_block(
                    image)
                # calculate the batch loss
                loss_id = criterion_id(idmask_pred, idmask)
                loss_u = criterion_u(umask_pred, umask)
                loss_v = criterion_v(vmask_pred, vmask)
                total_loss = loss_id + loss_u + loss_v
                # update average validation loss
                valid_idmask_loss += loss_id.item()
                valid_umask_loss += loss_u.item()
                valid_vmask_loss += loss_v.item()
                valid_loss += total_loss.item()
                batch_cnt += 1

        # Calculate average losses
        train_loss = train_loss / len(train_loader.sampler)
        train_idmask_loss = train_idmask_loss / len(train_loader.sampler)
        train_umask_loss = train_umask_loss / len(train_loader.sampler)
        train_vmask_loss = train_vmask_loss / len(train_loader.sampler)

        valid_loss = valid_loss / len(valid_loader.sampler)
        valid_idmask_loss = valid_idmask_loss / len(valid_loader.sampler)
        valid_umask_loss = valid_umask_loss / len(valid_loader.sampler)
        valid_vmask_loss = valid_vmask_loss / len(valid_loader.sampler)

        # print training/validation statistics
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.
              format(epoch, train_loss, valid_loss))
        print('Train IDMask loss: %.6f \tUMask loss: %.6f \tUMask loss: %.6f' % \
            (train_idmask_loss, train_umask_loss, train_vmask_loss))
        print('Valid IDMask loss: %.6f \tUMask loss: %.6f \tUMask loss: %.6f' % \
            (valid_idmask_loss, valid_umask_loss, valid_vmask_loss))

        scheduler.step(valid_loss)

        # TODO - monitor for train/val divergence and stop

        # save model if validation loss has decreased
        # Don't save model on the first epoch in case we're transfer learning and initialized to Inf
        if valid_loss <= valid_loss_min and (epoch > 1 or epochs == 1):
            print(
                'Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'
                .format(valid_loss_min, valid_loss))

            if not out_path_and_name:
                correspondence_block_filename = os.path.join(
                    train_eval_dir, 'correspondence_block.pt')
            else:
                correspondence_block_filename = out_path_and_name

            torch.save(correspondence_block.state_dict(),
                       correspondence_block_filename)
            valid_loss_min = valid_loss
        if epoch == 1:
            valid_loss_min = valid_loss