def fit_uv_mesh(initial_mesh: dict,
                target_dataset,
                max_iterations: int = 5000,
                resolution: int = 4,
                log_interval: int = 10,
                dispaly_interval=1000,
                display_res=512,
                out_dir=None,
                mp4save_interval=None):
    glctx = dr.RasterizeGLContext()

    r_rot = util.random_rotation_translation(0.25)

    # Smooth rotation for display.
    ang = 0.0
    a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
    dist = 2

    # Modelview and modelview + projection matrices.
    proj = util.projection(x=0.4, n=1.0, f=200.0)
    r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot)
    r_mvp = np.matmul(proj, r_mv).astype(np.float32)
    a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
    a_mvp = np.matmul(proj, a_mv).astype(np.float32)

    pos_idx = initial_mesh['pos_idx'].cuda()
    vtx_pos = initial_mesh['vtx_pos'].cuda()
    tex = np.ones((1024, 1024, 3), dtype=np.float32) / 2

    uv, uv_idx = init_uv()
    uv_idx = uv_idx[:pos_idx.shape[0]]
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
    uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda()
    vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda()
    tex = torch.from_numpy(tex.astype(np.float32)).cuda()

    # Render reference and optimized frames. Always enable mipmapping for reference.
    color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex, 1024,
                   False, 0)
    Image.fromarray((color[0].detach().cpu().numpy() * 255).astype(
        np.uint8)).save('test.png')
def fit_mesh_col(
    initial_mesh: dict,
    target_dataset_dir: str,
    max_iterations: int = 10000,
    resolution: int = 256,
    log_interval: int = None,
    display_interval = None,
    display_res = 512,
    out_dir = None,
    mp4save_interval = None
    ):

    distance = 3

    target_dataset = util.ReferenceImages(target_dataset_dir, resolution, resolution)

    pos_idx = torch.from_numpy(initial_mesh['pos_idx'].astype(np.int32))
    vtx_pos = torch.from_numpy(initial_mesh['vtx_pos'].astype(np.float32))

    laplace = util.compute_laplace_matrix(vtx_pos, pos_idx).cuda()
    pos_idx = pos_idx.cuda()
    vtx_pos = vtx_pos.cuda()

    init_rot = util.rotate_z(-math.pi/2).cuda()
    vtx_pos = transform_pos(init_rot, vtx_pos)[0][:, 0:3]
    vtx_pos.requires_grad = True

    col_idx  = torch.from_numpy(initial_mesh['pos_idx'].astype(np.int32)).cuda()
    vtx_col  = torch.ones_like(vtx_pos) * 0.5
    vtx_col.requires_grad = True

    glctx = dr.RasterizeGLContext()


    M1 = torch.eye(len(target_dataset)).cuda()
    M1.requires_grad = True
    M2 = torch.eye(len(target_dataset)).cuda()
    M2.requires_grad = True

    #M3 = torch.zeros((3, vtx_pos.shape[0], len(target_dataset))).cuda()
    M3 = torch.zeros((3 * vtx_pos.shape[0], len(target_dataset))).cuda()
    M3.requires_grad = True

    lr_ramp = .1
    params = [{'params': [M1, M2, M3], 'lr': 1e-3}, {'params': vtx_col, 'lr': 1e-2}]
    # params = [{'params': vtx_col, 'lr': 1e-2}]
    #lambdas = [lambda x: max(0.01, 10**(-x*0.0005)), lambda x: lr_ramp**(float(x)/float(max_iterations))]


    optimizer    = torch.optim.Adam(params)
    #scheduler    = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambdas)

    total_steps = 0

    loss_hist, l2_hist, reg_hist = [], [], []

    for i in range(max_iterations):
        for j, (img, angle) in enumerate(target_dataset):
            img = img.cuda().permute(2,1,0)

            frame_tensor = torch.zeros(len(target_dataset))
            frame_tensor[j] = 1
            frame_tensor = frame_tensor.cuda()
            frame_tensor.requires_grad = True

            deltas = torch.matmul(M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten()
            #deformed_vtxs = vtx_pos + deltas.T
            deformed_vtxs = (vtx_pos.flatten() + deltas).reshape((vtx_pos.shape[0], 3))

            # create the model-view-projection matrix
            # rotate model about z axis by angle
            rot = util.rotate_y(angle)
            #rot = torch.eye(4)
            # translate by distance
            tr = util.translate(z=-distance)
            # perspective projection
            proj = util.projection(x=0.4)

            mtx = proj.matmul(tr.matmul(rot)).cuda()
            mtx.requires_grad = True

            estimate = render(glctx, mtx, deformed_vtxs, pos_idx, col_idx, vtx_col, resolution)[0]

            # compute loss
            loss = torch.mean((estimate - img) ** 2)

            # compute regularizer
            reg = torch.mean((util.compute_curvature(deformed_vtxs, laplace) - util.compute_curvature(vtx_pos, laplace)) ** 2) + torch.mean(deltas**2)
            
            # combine
            loss = loss + 5 * reg

            loss_hist.append(loss.cpu().numpy())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #scheduler.step()

            with torch.no_grad():
                #print(f"Loss: {loss}")
                # clamp color between 0 and 1
                vtx_col.clamp_(0, 1)

            if (display_interval and (i % display_interval == 0)) or (i == max_iterations - 1):
                print(loss)
                with torch.no_grad():
                    estimate = render(glctx, mtx, deformed_vtxs, pos_idx, col_idx, vtx_col, resolution)[0].detach().cpu().numpy()
                    Image.fromarray((estimate * 255).astype(np.uint8)).save('estimate.png')
                    img = img.detach().cpu().numpy()
                    Image.fromarray((img * 255).astype(np.uint8)).save('img.png')


    with torch.no_grad():
        for i, (im, _) in enumerate(target_dataset):
            frame_tensor = torch.zeros(len(target_dataset))
            frame_tensor[j] = 1
            frame_tensor = frame_tensor.cuda()

            deltas = torch.matmul(M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten()
            deformed_vtxs = (vtx_pos.flatten() + deltas).reshape((vtx_pos.shape[0], 3))
            deformed_vtxs = torch.clamp(deformed_vtxs, -1.0, 1.0)

            #write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist())
            util.write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist(), vtx_col.detach().cpu().tolist())

    np.savez('vtx_col.npz', vtx_col=vtx_col.cpu().detach().numpy())
Beispiel #3
0
def fit_earth(max_iter=20000,
              log_interval=10,
              display_interval=None,
              display_res=1024,
              enable_mip=True,
              res=512,
              ref_res=4096,
              lr_base=1e-2,
              lr_ramp=0.1,
              out_dir=None,
              log_fn=None,
              texsave_interval=None,
              texsave_fn=None,
              imgsave_interval=None,
              imgsave_fn=None):

    log_file = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(out_dir + '/' + log_fn, 'wt')
    else:
        imgsave_interval, texsave_interval = None, None

    # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at
    # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/earth.npz') as f:
        pos_idx, pos, uv_idx, uv, tex = f.values()
    tex = tex.astype(np.float32) / 255.0
    max_mip_level = 9  # Texture is a 4x3 atlas of 512x512 maps.
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1).  Drop
    # the last column in that case.
    if pos.shape[1] == 4: pos = pos[:, 0:3]

    # Create position/triangle index tensors
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
    uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda()
    vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda()

    tex = torch.from_numpy(tex.astype(np.float32)).cuda()
    tex_opt = torch.full(tex.shape, 0.2, device='cuda', requires_grad=True)
    glctx = dr.RasterizeGLContext()

    ang = 0.0

    # Adam optimizer for texture with a learning rate ramp.
    optimizer = torch.optim.Adam([tex_opt], lr=lr_base)
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lr_lambda=lambda x: lr_ramp**(float(x) / float(max_iter)))

    # Render.
    ang = 0.0
    texloss_avg = []
    for it in range(max_iter + 1):
        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
        dist = np.random.uniform(0.0, 48.5)

        # Modelview and modelview + projection matrices.
        proj = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)

        # Measure texture-space RMSE loss
        with torch.no_grad():
            texmask = torch.zeros_like(tex)
            tr = tex.shape[1] // 4
            texmask[tr + 13:2 * tr - 13, 25:-25, :] += 1.0
            texmask[25:-25, tr + 13:2 * tr - 13, :] += 1.0
            # Measure only relevant portions of texture when calculating texture
            # PSNR.
            texloss = (torch.sum(texmask * (tex - tex_opt)**2) /
                       torch.sum(texmask))**0.5  # RMSE within masked area.
            texloss_avg.append(float(texloss))

        # Render reference and optimized frames. Always enable mipmapping for reference.
        color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex,
                       ref_res, True, max_mip_level)
        color_opt = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx,
                           tex_opt, res, enable_mip, max_mip_level)

        # Reduce the reference to correct size.
        while color.shape[1] > res:
            color = util.bilinear_downsample(color)

        # Compute loss and perform a training step.
        loss = torch.mean((color - color_opt)**2)  # L2 pixel loss.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            texloss_val = np.mean(np.asarray(texloss_avg))
            texloss_avg = []
            psnr = -10.0 * np.log10(texloss_val**
                                    2)  # PSNR based on average RMSE.
            s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save image.
        display_image = display_interval and (it % display_interval == 0)
        save_image = imgsave_interval and (it % imgsave_interval == 0)
        save_texture = texsave_interval and (it % texsave_interval) == 0

        if display_image or save_image:
            ang = ang + 0.1

            with torch.no_grad():
                result_image = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_uv,
                                      uv_idx, tex_opt, res, enable_mip,
                                      max_mip_level)[0].cpu().numpy()

                if display_image:
                    util.display_image(result_image,
                                       size=display_res,
                                       title='%d / %d' % (it, max_iter))
                if save_image:
                    util.save_image(out_dir + '/' + (imgsave_fn % it),
                                    result_image)

                if save_texture:
                    texture = tex_opt.cpu().numpy()[::-1]
                    util.save_image(out_dir + '/' + (texsave_fn % it), texture)

    # Done.
    if log_file:
        log_file.close()
def fit_cube(max_iter          = 5000,
             resolution        = 4, 
             discontinuous     = False,
             repeats           = 1,
             log_interval      = 10, 
             display_interval  = None,
             display_res       = 512,
             out_dir           = '.',
             log_fn            = None,
             imgsave_interval  = None,
             imgsave_fn        = None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
    
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
    with np.load(f'{datadir}/{fn}') as f:
        pos_idx, vtxp, col_idx, vtxc = f.values()
    print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0]))
        
    # Transformation matrix input to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])

    # Setup TF graph for reference.
    vtxw = np.concatenate([vtxp, np.ones([vtxp.shape[0], 1])], axis=1).astype(np.float32)
    pos_clip = tf.matmul(vtxw, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False)
    color, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out, col_idx)
    color = dr.antialias(color, rast_out, pos_clip, pos_idx)

    # Optimized variables.
    vtxc_opt = tf.get_variable('vtxc', initializer=tf.zeros_initializer(), shape=vtxc.shape)
    vtxp_opt = tf.get_variable('vtxp', initializer=tf.zeros_initializer(), shape=vtxp.shape)

    # Optimization variable setters for initialization.
    vtxc_opt_in = tf.placeholder(tf.float32, vtxc.shape)
    vtxp_opt_in = tf.placeholder(tf.float32, vtxp.shape)
    opt_set = tf.group(tf.assign(vtxc_opt, vtxc_opt_in), tf.assign(vtxp_opt, vtxp_opt_in))

    # Setup TF graph for what we optimize result.
    vtxw_opt = tf.concat([vtxp_opt, tf.ones([vtxp.shape[0], 1], tf.float32)], axis=1)
    pos_clip_opt = tf.matmul(vtxw_opt, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False)
    color_opt, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_opt, col_idx)
    color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx)

    # Image-space loss and optimizer.
    loss = tf.reduce_mean((color_opt - color)**2)
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[vtxp_opt, vtxc_opt])

    # Setup TF graph for display.
    rast_out_disp, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[display_res, display_res], output_db=False)
    color_disp, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_disp, col_idx)
    color_disp = dr.antialias(color_disp, rast_out_disp, pos_clip_opt, pos_idx)
    rast_out_disp_ref, _ = dr.rasterize(pos_clip, pos_idx, resolution=[display_res, display_res], output_db=False)
    color_disp_ref, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out_disp_ref, col_idx)
    color_disp_ref = dr.antialias(color_disp_ref, rast_out_disp_ref, pos_clip, pos_idx)

    # Geometric error calculation
    geom_loss = tf.reduce_mean(tf.reduce_sum((tf.abs(vtxp_opt) - .5)**2, axis=1)**0.5)

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Repeats.
    for rep in range(repeats):

        # Optimize.
        ang = 0.0
        gl_avg = []
        util.init_uninitialized_vars()
        for it in range(max_iter + 1):
            # Initialize optimization.
            if it == 0:
                vtxp_init = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
                vtxc_init = np.random.uniform(0.0, 1.0, size=vtxc.shape)
                util.run(opt_set, {vtxc_opt_in: vtxc_init.astype(np.float32), vtxp_opt_in: vtxp_init.astype(np.float32)})

            # Learning rate ramp.
            lr = 1e-2
            lr = lr * max(0.01, 10**(-it*0.0005))

            # Random rotation/translation matrix for optimization.
            r_rot = util.random_rotation_translation(0.25)

            # Smooth rotation for display.
            a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

            # Modelview and modelview + projection matrices.
            proj  = util.projection(x=0.4)
            r_mv  = np.matmul(util.translate(0, 0, -3.5), r_rot)
            r_mvp = np.matmul(proj, r_mv).astype(np.float32)
            a_mv  = np.matmul(util.translate(0, 0, -3.5), a_rot)
            a_mvp = np.matmul(proj, a_mv).astype(np.float32)
        
            # Run training and measure geometric error.
            gl_val, _ = util.run([geom_loss, train_op], {mtx_in: r_mvp, lr_in: lr})
            gl_avg.append(gl_val)

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                gl_val, gl_avg = np.mean(np.asarray(gl_avg)), []
                s = ("rep=%d," % rep) if repeats > 1 else ""
                s += "iter=%d,err=%f" % (it, gl_val)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_image = imgsave_interval and (it % imgsave_interval == 0)

            if display_image or save_image:
                ang = ang + 0.1
                img_o = util.run(color_opt,      {mtx_in: r_mvp})[0]
                img_b = util.run(color,          {mtx_in: r_mvp})[0]
                img_d = util.run(color_disp,     {mtx_in: a_mvp})[0]
                img_r = util.run(color_disp_ref, {mtx_in: a_mvp})[0]

                scl = display_res // img_o.shape[0]
                img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
                img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
                result_image = np.concatenate([img_o, img_b, img_d, img_r], axis=1)

            if display_image:
                util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
            if save_image:
                util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)

    # All repeats done.
    if log_file:
        log_file.close()
def fit_cube(max_iter=5000,
             resolution=4,
             discontinuous=False,
             repeats=1,
             log_interval=10,
             display_interval=None,
             display_res=512,
             out_dir=None,
             log_fn=None,
             mp4save_interval=None,
             mp4save_fn=None):

    log_file = None
    writer = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(f'{out_dir}/{log_fn}', 'wt')
        if mp4save_interval != 0:
            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}',
                                        mode='I',
                                        fps=30,
                                        codec='libx264',
                                        bitrate='16M')
    else:
        mp4save_interval = None

    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
    with np.load(f'{datadir}/{fn}') as f:
        pos_idx, vtxp, col_idx, vtxc = f.values()
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], vtxp.shape[0]))

    # Create position/triangle index tensors
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(vtxp.astype(np.float32)).cuda()
    vtx_col = torch.from_numpy(vtxc.astype(np.float32)).cuda()

    glctx = dr.RasterizeGLContext()

    # Repeats.
    for rep in range(repeats):

        ang = 0.0
        gl_avg = []

        vtx_pos_rand = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
        vtx_col_rand = np.random.uniform(0.0, 1.0, size=vtxc.shape)
        vtx_pos_opt = torch.tensor(vtx_pos_rand,
                                   dtype=torch.float32,
                                   device='cuda',
                                   requires_grad=True)
        vtx_col_opt = torch.tensor(vtx_col_rand,
                                   dtype=torch.float32,
                                   device='cuda',
                                   requires_grad=True)

        # Adam optimizer for vertex position and color with a learning rate ramp.
        optimizer = torch.optim.Adam([vtx_pos_opt, vtx_col_opt], lr=1e-2)
        scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer, lr_lambda=lambda x: max(0.01, 10**(-x * 0.0005)))

        for it in range(max_iter + 1):
            # Random rotation/translation matrix for optimization.
            r_rot = util.random_rotation_translation(0.25)

            # Smooth rotation for display.
            a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

            # Modelview and modelview + projection matrices.
            proj = util.projection(x=0.4)
            r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
            r_mvp = np.matmul(proj, r_mv).astype(np.float32)
            a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
            a_mvp = np.matmul(proj, a_mv).astype(np.float32)

            # Compute geometric error for logging.
            with torch.no_grad():
                geom_loss = torch.mean(
                    torch.sum((torch.abs(vtx_pos_opt) - .5)**2, dim=1)**0.5)
                gl_avg.append(float(geom_loss))

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                gl_val = np.mean(np.asarray(gl_avg))
                gl_avg = []
                s = ("rep=%d," % rep) if repeats > 1 else ""
                s += "iter=%d,err=%f" % (it, gl_val)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_col, col_idx,
                           resolution)
            color_opt = render(glctx, r_mvp, vtx_pos_opt, pos_idx, vtx_col_opt,
                               col_idx, resolution)

            # Compute loss and train.
            loss = torch.mean((color - color_opt)**2)  # L2 pixel loss.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_mp4 = mp4save_interval and (it % mp4save_interval == 0)

            if display_image or save_mp4:
                ang = ang + 0.01

                img_b = color[0].cpu().numpy()
                img_o = color_opt[0].detach().cpu().numpy()
                img_d = render(glctx, a_mvp, vtx_pos_opt, pos_idx, vtx_col_opt,
                               col_idx, display_res)[0]
                img_r = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_col,
                               col_idx, display_res)[0]

                scl = display_res // img_o.shape[0]
                img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
                img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
                result_image = make_grid(
                    np.stack([
                        img_o, img_b,
                        img_d.detach().cpu().numpy(),
                        img_r.cpu().numpy()
                    ]))

                if display_image:
                    util.display_image(result_image,
                                       size=display_res,
                                       title='%d / %d' % (it, max_iter))
                if save_mp4:
                    writer.append_data(
                        np.clip(np.rint(result_image * 255.0), 0,
                                255).astype(np.uint8))

    # Done.
    if writer is not None:
        writer.close()
    if log_file:
        log_file.close()
def fit_earth(max_iter=20000,
              log_interval=10,
              display_interval=None,
              display_res=1024,
              enable_mip=True,
              res=512,
              ref_res=4096,
              lr_base=1e-2,
              lr_ramp=0.1,
              out_dir='.',
              log_fn=None,
              texsave_interval=None,
              texsave_fn=None,
              imgsave_interval=None,
              imgsave_fn=None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)

    # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at
    # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/earth.npz') as f:
        pos_idx, pos, uv_idx, uv, tex = f.values()
    tex = tex.astype(np.float32) / 255.0
    max_mip_level = 9  # Texture is a 4x3 atlas of 512x512 maps.
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Transformation matrix input to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])

    # Learned texture.
    tex_var = tf.get_variable('tex',
                              initializer=tf.constant_initializer(0.2),
                              shape=tex.shape)

    # Setup TF graph for reference rendering in high resolution.
    pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [ref_res, ref_res])
    texc, texd = dr.interpolate(uv[tf.newaxis, ...],
                                rast_out,
                                uv_idx,
                                rast_db=rast_out_db,
                                diff_attrs='all')
    color = dr.texture(tex[np.newaxis],
                       texc,
                       texd,
                       filter_mode='linear-mipmap-linear',
                       max_mip_level=max_mip_level)
    color = color * tf.clip_by_value(rast_out[..., -1:], 0,
                                     1)  # Mask out background.

    # Reduce the reference to correct size.
    while color.shape[1] > res:
        color = util.bilinear_downsample(color)

    # TF Graph for rendered candidate.
    if enable_mip:
        # With mipmaps.
        rast_out_opt, rast_out_db_opt = dr.rasterize(pos_clip, pos_idx,
                                                     [res, res])
        texc_opt, texd_opt = dr.interpolate(uv[tf.newaxis, ...],
                                            rast_out_opt,
                                            uv_idx,
                                            rast_db=rast_out_db_opt,
                                            diff_attrs='all')
        color_opt = dr.texture(tex_var[np.newaxis],
                               texc_opt,
                               texd_opt,
                               filter_mode='linear-mipmap-linear',
                               max_mip_level=max_mip_level)
    else:
        # No mipmaps: no image-space derivatives anywhere.
        rast_out_opt, _ = dr.rasterize(pos_clip,
                                       pos_idx, [res, res],
                                       output_db=False)
        texc_opt, _ = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx)
        color_opt = dr.texture(tex_var[np.newaxis],
                               texc_opt,
                               filter_mode='linear')
    color_opt = color_opt * tf.clip_by_value(rast_out_opt[..., -1:], 0,
                                             1)  # Mask out background.

    # Measure only relevant portions of texture when calculating texture PSNR.
    loss = tf.reduce_mean((color - color_opt)**2)
    texmask = np.zeros_like(tex)
    tr = tex.shape[1] // 4
    texmask[tr + 13:2 * tr - 13, 25:-25, :] += 1.0
    texmask[25:-25, tr + 13:2 * tr - 13, :] += 1.0
    texloss = (tf.reduce_sum(texmask * (tex - tex_var)**2) /
               np.sum(texmask))**0.5  # RMSE within masked area.

    # Training driven by image-space loss.
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9,
                                      0.99).minimize(loss, var_list=[tex_var])

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Render.
    ang = 0.0
    util.init_uninitialized_vars()
    texloss_avg = []
    for it in range(max_iter + 1):
        lr = lr_base * lr_ramp**(float(it) / float(max_iter))

        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        ang = ang + 0.01
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
        dist = np.random.uniform(0.0, 48.5)

        # Modelview and modelview + projection matrices.
        proj = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)

        # Run training and measure texture-space RMSE loss.
        texloss_val, _ = util.run([texloss, train_op], {
            mtx_in: r_mvp,
            lr_in: lr
        })
        texloss_avg.append(texloss_val)

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            texloss_val, texloss_avg = np.mean(np.asarray(texloss_avg)), []
            psnr = -10.0 * np.log10(texloss_val**
                                    2)  # PSNR based on average RMSE.
            s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save result images/textures.
        display_image = display_interval and (it % display_interval) == 0
        save_image = imgsave_interval and (it % imgsave_interval) == 0
        save_texture = texsave_interval and (it % texsave_interval) == 0

        if display_image or save_image:
            result_image = util.run(color_opt, {mtx_in: a_mvp})[0]
        if display_image:
            util.display_image(result_image,
                               size=display_res,
                               title='%d / %d' % (it, max_iter))
        if save_image:
            util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)
        if save_texture:
            util.save_image(out_dir + '/' + (texsave_fn % it),
                            util.run(tex_var)[::-1])

    # Done.
    if log_file:
        log_file.close()
Beispiel #7
0
def fit_env_phong(max_iter          = 1000,
                  log_interval      = 10,
                  display_interval  = None,
                  display_res       = 1024,
                  res               = 1024,
                  lr_base           = 1e-2,
                  lr_ramp           = 1.0,
                  out_dir           = None,
                  log_fn            = None,
                  mp4save_interval  = None,
                  mp4save_fn        = None):

    log_file = None
    writer = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(out_dir + '/' + log_fn, 'wt')
        if mp4save_interval != 0:
            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M')
    else:
        mp4save_interval = None

    # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/envphong.npz') as f:
        pos_idx, pos, normals, env = f.values()
    env = env.astype(np.float32)/255.0
    env = np.stack(env)[:, ::-1].copy()
    print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))

    # Move all the stuff to GPU.
    pos_idx = torch.as_tensor(pos_idx, dtype=torch.int32, device='cuda')
    pos = torch.as_tensor(pos, dtype=torch.float32, device='cuda')
    normals = torch.as_tensor(normals, dtype=torch.float32, device='cuda')
    env = torch.as_tensor(env, dtype=torch.float32, device='cuda')

    # Target Phong parameters.
    phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32)
    phong_exp = 25.0
    phong_rgb_t = torch.as_tensor(phong_rgb, dtype=torch.float32, device='cuda')

    # Learned variables: environment maps, phong color, phong exponent.
    env_var = torch.ones_like(env) * .5
    env_var.requires_grad_()
    phong_var_raw = torch.as_tensor(np.random.uniform(size=[4]), dtype=torch.float32, device='cuda')
    phong_var_raw.requires_grad_()
    phong_var_mul = torch.as_tensor([1.0, 1.0, 1.0, 10.0], dtype=torch.float32, device='cuda')

    # Render.
    ang = 0.0
    imgloss_avg, phong_avg = [], []
    glctx = dr.RasterizeGLContext()
    zero_tensor = torch.as_tensor(0.0, dtype=torch.float32, device='cuda')
    one_tensor = torch.as_tensor(1.0, dtype=torch.float32, device='cuda')

    # Adam optimizer for environment map and phong with a learning rate ramp.
    optimizer = torch.optim.Adam([env_var, phong_var_raw], lr=lr_base)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter)))

    for it in range(max_iter + 1):
        phong_var = phong_var_raw * phong_var_mul

        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        ang = ang + 0.01
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

        # Modelview and modelview + projection matrices.
        proj  = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv  = np.matmul(util.translate(0, 0, -3.5), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv  = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)
        a_mvc = a_mvp
        r_mvp = torch.as_tensor(r_mvp, dtype=torch.float32, device='cuda')
        a_mvp = torch.as_tensor(a_mvp, dtype=torch.float32, device='cuda')

        # Solve camera positions.
        a_campos = torch.as_tensor(np.linalg.inv(a_mv)[:3, 3], dtype=torch.float32, device='cuda')
        r_campos = torch.as_tensor(np.linalg.inv(r_mv)[:3, 3], dtype=torch.float32, device='cuda')

        # Random light direction.        
        lightdir = np.random.normal(size=[3])
        lightdir /= np.linalg.norm(lightdir) + 1e-8
        lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')

        def render_refl(ldir, cpos, mvp):
            # Transform and rasterize.
            viewvec = pos[..., :3] - cpos[np.newaxis, np.newaxis, :] # View vectors at vertices.
            reflvec = viewvec - 2.0 * normals[np.newaxis, ...] * torch.sum(normals[np.newaxis, ...] * viewvec, -1, keepdim=True) # Reflection vectors at vertices.
            reflvec = reflvec / torch.sum(reflvec**2, -1, keepdim=True)**0.5 # Normalize.
            pos_clip = torch.matmul(pos, mvp.t())[np.newaxis, ...]
            rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, [res, res])
            refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors.

            # Phong light.
            refl = refl / (torch.sum(refl**2, -1, keepdim=True) + 1e-8)**0.5  # Normalize.
            ldotr = torch.sum(-ldir * refl, -1, keepdim=True) # L dot R.

            # Return
            return refl, refld, ldotr, (rast_out[..., -1:] == 0)

        # Render the reflections.
        refl, refld, ldotr, mask = render_refl(lightdir, r_campos, r_mvp)

        # Reference color. No need for AA because we are not learning geometry.
        color = dr.texture(env[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
        color = color + phong_rgb_t * torch.max(zero_tensor, ldotr) ** phong_exp # Phong.
        color = torch.where(mask, one_tensor, color) # White background.

        # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead.
        color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
        color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] # Phong.
        color_opt = torch.where(mask, one_tensor, color_opt) # White background.

        # Compute loss and train.
        loss = torch.mean((color - color_opt)**2) # L2 pixel loss.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Collect losses.
        imgloss_avg.append(loss.detach().cpu().numpy())
        phong_avg.append(phong_var.detach().cpu().numpy())

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), []
            phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), []
            phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5
            phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp
            s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save result image.        
        display_image = display_interval and (it % display_interval == 0)
        save_mp4 = mp4save_interval and (it % mp4save_interval == 0)

        if display_image or save_mp4:
            lightdir = np.asarray([.8, -1., .5, 0.0])
            lightdir = np.matmul(a_mvc, lightdir)[:3]
            lightdir /= np.linalg.norm(lightdir)
            lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')
            refl, refld, ldotr, mask = render_refl(lightdir, a_campos, a_mvp)
            color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
            color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3]
            color_opt = torch.where(mask, one_tensor, color_opt)
            result_image = color_opt.detach()[0].cpu().numpy()
            if display_image:
                util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
            if save_mp4:
                writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8))

    # Done.
    if writer is not None:
        writer.close()
    if log_file:
        log_file.close()
def fit_env_phong(max_iter=1000,
                  log_interval=10,
                  display_interval=None,
                  display_res=1024,
                  res=1024,
                  lr_base=1e-2,
                  lr_ramp=1.0,
                  out_dir='.',
                  log_fn=None,
                  imgsave_interval=None,
                  imgsave_fn=None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)

    # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/envphong.npz') as f:
        pos_idx, pos, normals, env = f.values()
    env = env.astype(np.float32) / 255.0
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Target Phong parameters.
    phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32)
    phong_exp = 25.0

    # Inputs to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])
    invmtx_in = tf.placeholder(tf.float32, [4, 4])  # Inverse.
    campos_in = tf.placeholder(tf.float32,
                               [3])  # Camera position in world space.
    lightdir_in = tf.placeholder(tf.float32, [3])  # Light direction.

    # Learned variables: environment maps, phong color, phong exponent.
    env_var = tf.get_variable('env_var',
                              initializer=tf.constant_initializer(0.5),
                              shape=env.shape)
    phong_var_raw = tf.get_variable('phong_var',
                                    initializer=tf.random_uniform_initializer(
                                        0.0, 1.0),
                                    shape=[4])  # R, G, B, exp.
    phong_var = phong_var_raw * [1.0, 1.0, 1.0, 10.0
                                 ]  # Faster learning rate for the exponent.

    # Transform and rasterize.
    viewvec = pos[..., :3] - campos_in[
        np.newaxis, np.newaxis, :]  # View vectors at vertices.
    reflvec = viewvec - 2.0 * normals[tf.newaxis, ...] * tf.reduce_sum(
        normals[tf.newaxis, ...] * viewvec, axis=-1,
        keepdims=True)  # Reflection vectors at vertices.
    reflvec = reflvec / tf.reduce_sum(reflvec**2, axis=-1,
                                      keepdims=True)**0.5  # Normalize.
    pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [res, res])
    refl, refld = dr.interpolate(
        reflvec, rast_out, pos_idx, rast_db=rast_out_db,
        diff_attrs='all')  # Interpolated reflection vectors.

    # Phong light.
    refl = refl / tf.reduce_sum(refl**2, axis=-1,
                                keepdims=True)**0.5  # Normalize.
    ldotr = tf.reduce_sum(-lightdir_in * refl, axis=-1,
                          keepdims=True)  # L dot R.

    # Reference color. No need for AA because we are not learning geometry.
    env = np.stack(env)[:, ::-1]
    color = dr.texture(env[np.newaxis, ...],
                       refl,
                       refld,
                       filter_mode='linear-mipmap-linear',
                       boundary_mode='cube')
    color = tf.reduce_sum(tf.stack(color), axis=0)
    color = color + phong_rgb * tf.maximum(0.0, ldotr)**phong_exp  # Phong.
    color = tf.maximum(
        color,
        1.0 - tf.clip_by_value(rast_out[..., -1:], 0, 1))  # White background.

    # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead.
    color_opt = dr.texture(env_var[tf.newaxis, ...],
                           refl,
                           uv_da=refld,
                           filter_mode='linear-mipmap-linear',
                           boundary_mode='cube')
    color_opt = tf.reduce_sum(tf.stack(color_opt), axis=0)
    color_opt = color_opt + phong_var[:3] * tf.maximum(
        0.0, ldotr)**phong_var[3]  # Phong.
    color_opt = tf.maximum(
        color_opt,
        1.0 - tf.clip_by_value(rast_out[..., -1:], 0, 1))  # White background.

    # Training.
    loss = tf.reduce_mean((color - color_opt)**2)  # L2 pixel loss.
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.99).minimize(
        loss, var_list=[env_var, phong_var_raw])

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Render.
    ang = 0.0
    util.init_uninitialized_vars()
    imgloss_avg, phong_avg = [], []
    for it in range(max_iter + 1):
        lr = lr_base * lr_ramp**(float(it) / float(max_iter))

        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        ang = ang + 0.01
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

        # Modelview and modelview + projection matrices.
        proj = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)

        # Solve camera positions.
        a_campos = np.linalg.inv(a_mv)[:3, 3]
        r_campos = np.linalg.inv(r_mv)[:3, 3]

        # Random light direction.
        lightdir = np.random.normal(size=[3])
        lightdir /= np.linalg.norm(lightdir) + 1e-8

        # Run training and measure image-space RMSE loss.
        imgloss_val, phong_val, _ = util.run(
            [loss, phong_var, train_op], {
                mtx_in: r_mvp,
                invmtx_in: np.linalg.inv(r_mvp),
                campos_in: r_campos,
                lightdir_in: lightdir,
                lr_in: lr
            })
        imgloss_avg.append(imgloss_val**0.5)
        phong_avg.append(phong_val)

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            imgloss_val, imgloss_avg = np.mean(
                np.asarray(imgloss_avg, np.float32)), []
            phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32),
                                           axis=0), []
            phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5
            phong_exp_rel_err = np.abs(phong_val[3] - phong_exp) / phong_exp
            s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (
                it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save result image.
        display_image = display_interval and (it % display_interval == 0)
        save_image = imgsave_interval and (it % imgsave_interval == 0)

        if display_image or save_image:
            result_image = util.run(
                color_opt, {
                    mtx_in: a_mvp,
                    invmtx_in: np.linalg.inv(a_mvp),
                    campos_in: a_campos,
                    lightdir_in: lightdir
                })[0]
        if display_image:
            util.display_image(result_image,
                               size=display_res,
                               title='%d / %d' % (it, max_iter))
        if save_image:
            util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)

    # Done.
    if log_file:
        log_file.close()