Esempio n. 1
0
    def __getitem__(self, index):
        im_name_base = self.files[self.split][index]

        # raw_depth
        raw_depth_path = pjoin(self.root, str(im_name_base))
        raw_depth = png_reader_32bit(raw_depth_path, self.img_size)
        raw_depth = raw_depth.astype(float)
        raw_depth = raw_depth / 10000

        # raw_depth_mask
        raw_depth_mask = (raw_depth > 0.0001).astype(float)
        raw_depth = raw_depth[np.newaxis, :, :]
        raw_depth = torch.from_numpy(raw_depth).float()
        raw_depth_mask = torch.from_numpy(raw_depth_mask).float()


        # image
        rgb_path = raw_depth_path.replace('depth', 'colors')
        image = png_reader_uint8(rgb_path, self.img_size)
        image = image.astype(float)
        # image    = image / 255
        image = (image - 128) / 255
        image = image.transpose(2, 0, 1)
        image = torch.from_numpy(image).float()

        # normal
        normal_path = raw_depth_path.replace('depth', 'normal')
        normal = png_reader_uint8(normal_path, self.img_size)
        normal = normal.astype(float)
        normal = normal / 255
        normal = normal.transpose(2, 0, 1)

        # normal mask
        # normal_mask = np.power(normal[0], 2) + np.power(normal[1], 2) + np.power(normal[2], 2)
        # normal_mask = (normal_mask > 0.001).astype(float)
        #
        # normal[0][normal_mask == 0] = 0.001
        # normal[1][normal_mask == 0] = 0.001
        # normal[2][normal_mask == 0] = 0.001
        normal = 2 * normal - 1
        normal = torch.from_numpy(normal).float()

        # image          : RGB      3,240,320
        # raw_depth      : depth    1, 240,320
        # raw_depth_mask : 0 or 1,  240,320
        # normal         : /255  *2-1, 3,240,320
        # normal_mask    : 0 or 1,  240,320

        # with masks
        # return image, normal, normal_mask, raw_depth_mask, raw_depth

        # without masks
        return image, raw_depth, normal
    def __getitem__(self, index):
        # im_name_base = self.files[self.split][index]
        im_name_base = self.files[index]

        # raw_depth
        raw_depth_path = pjoin(self.root, str(im_name_base))
        raw_depth = png_reader_32bit(raw_depth_path, self.img_size)
        raw_depth[raw_depth == 65535] = 0.001
        raw_depth = raw_depth.astype(float)
        raw_depth = raw_depth / 10000
        raw_depth = raw_depth[np.newaxis, :, :]
        raw_depth = torch.from_numpy(raw_depth).float()

        # raw_depth_mask
        # raw_depth_mask = (raw_depth > 0.0001).astype(float)
        # raw_depth_mask = torch.from_numpy(raw_depth_mask).float()

        # image
        rgb_path = raw_depth_path.replace('depth_zbuffer', 'rgb')
        img = png_reader_uint8(rgb_path, self.img_size)
        img = img.astype(float)
        # img    = img / 255
        img = (img - 128) / 255
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).float()

        # normal
        normal_path = raw_depth_path.replace('depth_zbuffer', 'normal')
        normal = png_reader_uint8(normal_path, self.img_size)
        normal = normal.astype(float)
        normal = normal / 255
        normal = normal.transpose(2, 0, 1)
        normal = 2 * normal - 1
        normal = torch.from_numpy(normal).float()

        return img, raw_depth, normal
    def __getitem__(self, index):
        im_name_base = self.files[self.split][index]

        # raw_depth
        raw_depth_path = pjoin(self.root, str(im_name_base))
        raw_depth = png_reader_32bit(raw_depth_path, self.img_size)
        raw_depth = raw_depth.astype(float)
        raw_depth = raw_depth / 10000

        # raw_depth_mask
        raw_depth_mask = (raw_depth > 0.0001).astype(float)
        raw_depth = raw_depth[np.newaxis, :, :]
        raw_depth = torch.from_numpy(raw_depth).float()
        raw_depth_mask = torch.from_numpy(raw_depth_mask).float()

        # # segmentation label
        # seg_path = raw_depth_path.replace('/depth/', '/label/')
        # seg_img = png_reader_32bit(seg_path, self.img_size)
        # seg_img = torch.from_numpy(seg_img)

        # image
        rgb_path = raw_depth_path.replace('depth', 'colors')
        # rgb_path = rgb_path.replace('.png', '.jpg')
        image = png_reader_uint8(rgb_path, self.img_size)
        image = image.astype(float)
        # image    = image / 255
        image = (image - 128) / 255
        image = image.transpose(2, 0, 1)
        image = torch.from_numpy(image).float()

        index = im_name_base[19:]
        index = index.zfill(13)
        scene_name = im_name_base[:12]

        # render_depth
        render_depth_name = index.replace('.png', '_mesh_depth.png')
        render_depth_path = pjoin(self.root, scene_name, 'render_depth',
                                  render_depth_name)
        render_depth = png_reader_32bit(render_depth_path, self.img_size)
        render_depth = render_depth.astype(float)
        render_depth = render_depth / 40000
        render_depth = render_depth[np.newaxis, :, :]
        render_depth = torch.from_numpy(render_depth).float()

        # normal
        normal_x_path = render_depth_path.replace('_depth.png', '_nx.png')
        normal_y_path = render_depth_path.replace('_depth.png', '_ny.png')
        normal_z_path = render_depth_path.replace('_depth.png', '_nz.png')
        normal_x_path = normal_x_path.replace('/render_depth/',
                                              '/render_normal/')
        normal_y_path = normal_y_path.replace('/render_depth/',
                                              '/render_normal/')
        normal_z_path = normal_z_path.replace('/render_depth/',
                                              '/render_normal/')

        normal_x = png_reader_32bit(normal_x_path, self.img_size)
        normal_y = png_reader_32bit(normal_y_path, self.img_size)
        normal_z = png_reader_32bit(normal_z_path, self.img_size)

        normal_x = normal_x.astype(float)
        normal_y = normal_y.astype(float)
        normal_z = normal_z.astype(float)
        normal_x = normal_x / 65535
        normal_y = normal_y / 65535
        normal_z = normal_z / 65535

        # normal mask
        normal_mask = np.power(normal_x, 2) + np.power(normal_y, 2) + np.power(
            normal_z, 2)
        normal_mask = (normal_mask > 0.001).astype(float)

        normal_x[normal_mask == 0] = 0.5
        normal_y[normal_mask == 0] = 0.5
        normal_z[normal_mask == 0] = 0.5

        normal = np.concatenate(
            (normal_x[:, :, np.newaxis], 1 - normal_z[:, :, np.newaxis],
             normal_y[:, :, np.newaxis]),
            axis=2)
        normal = 2 * normal - 1
        normal = torch.from_numpy(normal).float()

        # image          : RGB -0.5-0.5, 3*h*w
        # raw_depth      : /10000, 1*h*w
        # raw_depth_mask : 0 or 1, h*w
        # render_depth   : /40000, 1*h*w
        # normal         : /65535, h*w*3
        # normal_mask    : 0 or 1, h*w
        # seg_img        : uint16, h*w same as normal_mask

        if (self.mode == 'seg'):
            # For segmentation mask
            return image, normal, normal_mask, raw_depth_mask, raw_depth, seg_img
        else:
            # Ordinary RGBD2normal
            return image, normal, normal_mask, raw_depth_mask, raw_depth, render_depth
Esempio n. 4
0
    def __getitem__(self, index):
        im_name_base = self.files[self.split][index]
        im_path = pjoin(self.root, im_name_base)

        im_name = im_name_base.replace('_i', '_d')
        im_name = im_name.replace('undistorted_color_dmages',
                                  'undistorted_depth_images')
        im_name = im_name.replace('.jpg', '.png')
        depth_path = pjoin(self.root, im_name)

        im_name = im_name_base.replace('_i', '_d')
        im_name = im_name.replace('undistorted_color_dmages', 'render_normal')
        lb_path_nx = pjoin(self.root, im_name.replace('.jpg', '_mesh_nx.png'))
        lb_path_ny = pjoin(self.root, im_name.replace('.jpg', '_mesh_ny.png'))
        lb_path_nz = pjoin(self.root, im_name.replace('.jpg', '_mesh_nz.png'))

        im_name = im_name_base.replace('_i', '_d')
        im_name = im_name.replace('undistorted_color_dmages', 'render_depth')
        meshdepth_path = pjoin(self.root,
                               im_name.replace('.jpg', '_mesh_depth.png'))

        im = png_reader_uint8(im_path, self.img_size)  #uint8
        rawdepth = png_reader_32bit(depth_path, self.img_size)  #32bit uint
        lbx = png_reader_32bit(lb_path_nx, self.img_size)
        lby = png_reader_32bit(lb_path_ny, self.img_size)
        lbz = png_reader_32bit(lb_path_nz, self.img_size)
        meshdepth = png_reader_32bit(meshdepth_path, self.img_size)

        im = im.astype(float)
        rawdepth = rawdepth.astype(float)
        lbx = lbx.astype(float)
        lby = lby.astype(float)
        lbz = lbz.astype(float)
        meshdepth = meshdepth.astype(float)

        if self.img_norm:
            # Resize scales images from -0.5 ~ 0.5
            im = (im - 128) / 255
            # Resize scales labels from -1 ~ 1
            lbx = lbx / 65535
            lby = lby / 65535
            lbz = lbz / 65535
            # Resize scales masks from 0 ~ 1
            mask = np.power(lbx, 2) + np.power(lby, 2) + np.power(lbz, 2)
            mask = (mask > 0.001).astype(float)
            #file holes
            lbx[mask == 0] = 0.5
            lby[mask == 0] = 0.5
            lbz[mask == 0] = 0.5
            lb = np.concatenate(
                (lbx[:, :, np.newaxis], 1 - lbz[:, :, np.newaxis],
                 lby[:, :, np.newaxis]),
                axis=2)
            lb = 2 * lb - 1
            # Resize scales valid, devide by mean value
            rawdepth = rawdepth / 40000
            meshdepth = meshdepth / 40000
            # Get valid from rawdepth
            valid = (rawdepth > 0.0001).astype(float)

        # NHWC -> NCHW
        im = im.transpose(2, 0, 1)
        im = torch.from_numpy(im).float()

        lb = torch.from_numpy(lb).float()
        mask = torch.from_numpy(mask).float()
        valid = torch.from_numpy(valid).float()

        rawdepth = rawdepth[np.newaxis, :, :]
        rawdepth = torch.from_numpy(rawdepth).float()

        meshdepth = meshdepth[np.newaxis, :, :]
        meshdepth = torch.from_numpy(meshdepth).float()

        # input: im, 3*h*w
        # gt: lb, h*w*3
        # mask: gt!=0,h*w
        # valid: rawdepth!=0, h*w
        # rawdepth: depth with hole, 1*h*w
        # meshdepth: depth with hole, 1*h*w
        return im, lb, mask, valid, rawdepth, meshdepth
Esempio n. 5
0
def test(args):
    # Setup Model
    # Setup the fusion model (RGB+Depth)
    model_name_F = args.arch_F
    model_F = get_model(model_name_F, True)  # concat and output
    model_F = torch.nn.DataParallel(model_F,
                                    device_ids=range(
                                        torch.cuda.device_count()))
    # Setup the map model
    if args.arch_map == 'map_conv':
        model_name_map = args.arch_map
        model_map = get_model(model_name_map, True)  # concat and output
        model_map = torch.nn.DataParallel(model_map,
                                          device_ids=range(
                                              torch.cuda.device_count()))

    if args.model_full_name != '':
        # Use the full name of model to load
        print("Load training model: " + args.model_full_name)
        checkpoint = torch.load(
            pjoin(args.model_savepath, args.model_full_name))
        model_F.load_state_dict(checkpoint['model_F_state'])
        model_map.load_state_dict(checkpoint["model_map_state"])

    # Setup image
    if args.imgset:
        print("Test on dataset: {}".format(args.dataset))
        data_loader = get_loader(args.dataset)
        data_path = get_data_path(args.dataset)
        v_loader = data_loader(data_path,
                               split=args.test_split,
                               img_size=(args.img_rows, args.img_cols),
                               img_norm=args.img_norm)
        evalloader = data.DataLoader(v_loader, batch_size=1)
        print("Finish Loader Setup")

        model_F.cuda()
        model_F.eval()
        if args.arch_map == 'map_conv':
            model_map.cuda()
            model_map.eval()

        sum_mean, sum_median, sum_small, sum_mid, sum_large, sum_num = [], [], [], [], [], []
        evalcount = 0
        with torch.no_grad():
            for i_val, (images_val, labels_val, masks_val, valids_val,
                        depthes_val,
                        meshdepthes_val) in tqdm(enumerate(evalloader)):

                images_val = Variable(images_val.contiguous().cuda())
                labels_val = Variable(labels_val.contiguous().cuda())
                masks_val = Variable(masks_val.contiguous().cuda())
                valids_val = Variable(valids_val.contiguous().cuda())
                depthes_val = Variable(depthes_val.contiguous().cuda())

                if args.arch_map == 'map_conv':
                    outputs_valid = model_map(
                        torch.cat(
                            (depthes_val, valids_val[:, np.newaxis, :, :]),
                            dim=1))
                    outputs, outputs1, outputs2, outputs3, output_d = model_F(
                        images_val, depthes_val, outputs_valid.squeeze(1))
                else:
                    outputs, outputs1, outputs2, outputs3, output_d = model_F(
                        images_val, depthes_val, valids_val)

                outputs_n, pixelnum, mean_i, median_i, small_i, mid_i, large_i = eval_normal_pixel(
                    outputs, labels_val, masks_val)
                outputs_norm = np.squeeze(outputs_n.data.cpu().numpy(), axis=0)
                labels_val_norm = np.squeeze(labels_val.data.cpu().numpy(),
                                             axis=0)
                images_val = np.squeeze(images_val.data.cpu().numpy(), axis=0)
                images_val = images_val + 0.5
                images_val = images_val.transpose(1, 2, 0)
                depthes_val = np.squeeze(depthes_val.data.cpu().numpy(),
                                         axis=0)
                depthes_val = np.transpose(depthes_val, [1, 2, 0])
                depthes_val = np.repeat(depthes_val, 3, axis=2)

                outputs_norm = change_channel(outputs_norm)
                labels_val_norm = (labels_val_norm + 1) / 2
                labels_val_norm = change_channel(labels_val_norm)

                # if (i_val+1)%10 == 0:
                misc.imsave(
                    pjoin(args.testset_out_path,
                          "{}_MS_hyb.png".format(i_val + 1)), outputs_norm)
                misc.imsave(
                    pjoin(args.testset_out_path,
                          "{}_gt.png".format(i_val + 1)), labels_val_norm)
                misc.imsave(
                    pjoin(args.testset_out_path,
                          "{}_in.jpg".format(i_val + 1)), images_val)
                misc.imsave(
                    pjoin(args.testset_out_path,
                          "{}_depth.png".format(i_val + 1)), depthes_val)

                # accumulate the metrics in matrix
                if ((np.isnan(mean_i)) | (np.isinf(mean_i)) == False):
                    sum_mean.append(mean_i)
                    sum_median.append(median_i)
                    sum_small.append(small_i)
                    sum_mid.append(mid_i)
                    sum_large.append(large_i)
                    sum_num.append(pixelnum)
                    evalcount += 1
                    if (i_val + 1) % 10 == 0:
                        print(
                            "Iteration %d Evaluation Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f"
                            % (i_val + 1, mean_i, median_i, small_i, mid_i,
                               large_i))

                        # Summarize the result
            eval_print(sum_mean,
                       sum_median,
                       sum_small,
                       sum_mid,
                       sum_large,
                       sum_num,
                       item='Pixel-Level')

            avg_mean = sum(sum_mean) / evalcount
            sum_mean.append(avg_mean)
            avg_median = sum(sum_median) / evalcount
            sum_median.append(avg_median)
            avg_small = sum(sum_small) / evalcount
            sum_small.append(avg_small)
            avg_mid = sum(sum_mid) / evalcount
            sum_mid.append(avg_mid)
            avg_large = sum(sum_large) / evalcount
            sum_large.append(avg_large)
            print(
                "evalnum is %d, Evaluation Image-Level Mean Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f"
                % (evalcount, avg_mean, avg_median, avg_small, avg_mid,
                   avg_large))

            sum_matrix = np.transpose(
                [sum_mean, sum_median, sum_small, sum_mid, sum_large])
            if args.model_full_name != '':
                sum_file = args.model_full_name[:-4] + '.csv'

            np.savetxt(pjoin(args.model_savepath, sum_file),
                       sum_matrix,
                       fmt='%.6f',
                       delimiter=',')
            print("Saving to %s" % (sum_file))
            # end of dataset test
    else:
        if os.path.isdir(args.out_path) == False:
            os.mkdir(args.out_path)
        print("Read Input Image from : {}".format(args.img_path))
        for i in os.listdir(args.img_path):
            if not i.endswith('.jpg'):
                continue

            print i
            input_f = args.img_path + i
            depth_f = args.depth_path + i[:-4] + '.png'
            output_f = args.out_path + i[:-4] + '_rgbd.png'
            img = misc.imread(input_f)

            orig_size = img.shape[:-1]
            if args.img_rot:
                img = np.transpose(img, (1, 0, 2))
                img = np.flipud(img)
                img = misc.imresize(img, (
                    args.img_cols,
                    args.img_rows))  # Need resize the image to model inputsize
            else:
                img = misc.imresize(img, (
                    args.img_rows,
                    args.img_cols))  # Need resize the image to model inputsize

            img = img.astype(np.float)
            if args.img_norm:
                img = (img - 128) / 255
            # NHWC -> NCHW
            img = img.transpose(2, 0, 1)
            img = np.expand_dims(img, 0)
            img = torch.from_numpy(img).float()

            if args.img_rot:
                depth = png_reader_32bit(depth_f,
                                         (args.img_rows, args.img_cols))
                depth = np.transpose(depth, (1, 0))
                depth = np.flipud(depth)
                # valid = png_reader_uint8(mask_f, (args.img_rows,args.img_cols))
                # valid = np.transpose(valid, (1,0))
                # valid = np.flipud(valid)
            else:
                depth = png_reader_32bit(depth_f,
                                         (args.img_rows, args.img_cols))
                # valid = png_reader_uint8(mask_f, (args.img_rows,args.img_cols))

            depth = depth.astype(float)
            # Please change to the scale so that scaled_depth=1 corresponding to real 10m depth
            # matterpot depth=depth/40000  scannet depth=depth/10000
            depth = depth / (args.d_scale)
            if depth.ndim == 3:  # to dim 2
                depth = depth[:, :, 0]
                # if valid.ndim == 3: #to dim 2
            #     valid = valid[:,:,0]

            # valid = 1-depth
            # valid[valid>1] = 1
            valid = (depth > 0.0001).astype(float)
            # valid = depth.astype(float)
            depth = depth[np.newaxis, :, :]
            depth = np.expand_dims(depth, 0)
            valid = np.expand_dims(valid, 0)
            depth = torch.from_numpy(depth).float()
            valid = torch.from_numpy(valid).float()

            if torch.cuda.is_available():
                model_F.cuda()
                model_F.eval()
                if args.arch_map == 'map_conv':
                    model_map.cuda()
                    model_map.eval()
                images = Variable(img.contiguous().cuda())
                depth = Variable(depth.contiguous().cuda())
                valid = Variable(valid.contiguous().cuda())
            else:
                images = Variable(img)
                depth = Variable(depth)
                valid = Variable(valid)

            with torch.no_grad():
                if args.arch_map == 'map_conv':
                    outputs_valid = model_map(
                        torch.cat((depth, valid[:, np.newaxis, :, :]), dim=1))
                    outputs, outputs1, outputs2, outputs3, output_d = model_F(
                        images, depth, outputs_valid.squeeze(1))
                else:
                    outputs, outputs1, outputs2, outputs3, output_d = model_F(
                        images, depth, outputs_valid)

            outputs_norm = norm_imsave(outputs)
            outputs_norm = np.squeeze(outputs_norm.data.cpu().numpy(), axis=0)
            # outputs_norm = misc.imresize(outputs_norm, orig_size)
            outputs_norm = change_channel(outputs_norm)
            misc.imsave(output_f, outputs_norm)
        print("Complete")