class CubeSphereComparisonGenerator(object): def __init__(self, width, height): self.cube = Renderer(width, height, "cube", False) self.sphere = Renderer(width, height, "sphere", True) self.shape = np.ones(160) * 0.9 def sample(self): cam = np.random.uniform(-1, 1, 3) return self.cube.render(np.zeros(160), cam), self.sphere.render(self.shape, cam)
class CubeGenerator(object): def __init__(self, width, height): self.renderer = Renderer(width, height, "cube", False) def sample(self): return self.renderer.render(np.zeros(160), np.random.uniform(-1, 1, 3))
class RotatingConstantShapeGenerator(object): def __init__(self, width, height, radius=.5): self.renderer = Renderer(width, height, "sphere", True) self.shape = np.ones(160) * radius def sample(self): self.cam = np.random.uniform(-1, 1, 3) return self.renderer.render(self.shape, self.cam)
class RotatingCubeGenerator(object): def __init__(self, width, height): self.renderer = Renderer(width, height, "cube", False) self.cam = None def sample(self): self.cam = np.random.uniform(-1, 1, 3) return self.renderer.render(np.zeros(160), self.cam)
class RotatingRandomShapeGenerator(object): def __init__(self, width, height, smin=.4, smax=.8): self.renderer = Renderer(width, height, "sphere", True) self.shape = np.random.uniform(smin, smax, 160) self.cam = None def sample(self): self.cam = np.random.uniform(-1, 1, 3) return self.renderer.render(self.shape, self.cam)
class RandomSingleViewGenerator(object): def __init__(self, width, height, smin=0, smax=1): self.renderer = Renderer(width, height, "sphere", True) self.cam = np.random.uniform(-1, 1, 3) self.min = smin self.max = smax def sample(self): return self.renderer.render( np.random.uniform(self.min, self.max, 160), self.cam)
class RotatingSingle3DIQTTGenerator(object): def __init__(self, width, height, smin=.5, smax=1): self.renderer = Renderer(width, height, "iqtt", True) self.shape = np.random.uniform(smin, smax, 160) def sample(self, cam=None): if cam is None: self.cam = np.random.uniform(-1, 1, 3) else: self.cam = cam return self.renderer.render(self.shape, self.cam)
target = data_generator.sample() env_state = np.ones(160, dtype=np.float32) * .5 env_rot = np.zeros(3, dtype=np.float32) state = torch.Tensor([np.swapaxes(npa(target), 0, 2)]) if torch.cuda.is_available(): state = state.cuda() entropies = [] log_probs = [] rewards = [] reward_raw_log = [] # just for logging purposes for t in range(NUM_STEPS): action, log_prob, entropy = agent.select_action(state) action = action.cpu() next_state = env.render(action[0], data_generator.cam) reward_raw = -np.linalg.norm(npa(target) - npa(next_state)).sum() reward_raw_log.append(reward_raw) if len(reward_avg) == 0: reward = reward_raw else: reward = reward_raw - np.mean(reward_avg) # update running mean reward_avg.append(reward_raw) if len(reward_avg) > REWARD_BUF: reward_avg.pop(0) rewards.append(reward)
entropy = -0.5 * ( (latent_variance + 2 * pi.expand_as(latent_variance)).log() + 1) log_probs.append(log_prob) entropies.append(entropy) # vertex_params = policy.decode(action).detach().view(-1).cpu().numpy() vertex_params = action.detach().view(-1).cpu().numpy() experiment.log_metric("vertices mean", np.mean(vertex_params)) experiment.log_metric("vertices min", np.min(vertex_params)) experiment.log_metric("vertices max", np.max(vertex_params)) # render out an image for each of the K samples # IMPORTANT THIS CURRENTLY ASSUMES BATCH SIZE = 1 next_state = env.render(vertex_params, data_generator.cam) next_state = npa(next_state, dtype=np.float32) / 255 # calculate reward for each one of the K samples reward_raw = -F.binary_cross_entropy( torch.tensor(next_state, requires_grad=False).float(), torch.tensor(state_raw, requires_grad=False).float(), reduction='sum') rewards_raw.append(reward_raw) # deduct average reward of all K-1 samples (variance reduction) rewards = npa(rewards_raw) - np.mean(rewards_raw) returns = torch.tensor(rewards).float().to(device)
rewards = [] rewards_raw = [] log_probs = [] for k in range(SAMPLES): # sample K times m = Normal(latent_mu, latent_stddev) action = m.rsample() log_probs.append(m.log_prob(action)) params = policy.decode(action) # render out an image for each of the K samples # IMPORTANT THIS CURRENTLY ASSUMES BATCH SIZE = 1 next_state = env.render(params.detach().view(-1).cpu().numpy(), data_generator.cam) # calculate reward for each one of the K samples reward_raw = -(np.square(npa(state_raw) - npa(next_state))).mean(axis=None) rewards_raw.append(reward_raw) # deduct average reward of all K-1 samples (variance reduction) for k in range(SAMPLES): baseline = np.mean(rewards_raw[:k] + rewards_raw[k + 1:]) rewards.append(rewards_raw[k] - baseline) # calculate additional VAE loss # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) KLD = -0.5 * torch.sum(1 + torch.log(latent_stddev.pow(2)) - latent_mu.pow(2) - latent_stddev.pow(2))
class ShapeConfig(): def __init__(self): self.rotation_speed = 1 # higher is faster, positive is right self.cam_z = -2 self.values = [] for x in range(7): for r in range(23): if x == 6 and r > 21: break Label(text=r + 1 + (x * 23), relief=RIDGE, width=5).grid(row=r, column=0 + (x * 2)) s = Scale(master, from_=0., to=1., resolution=0.1, orient=HORIZONTAL) s.set(1) s.grid(row=r, column=1 + (x * 2)) # length=10, self.values.append(s) Button(master, text='max', command=self.max).grid(row=16, column=14, columnspan=2) Button(master, text='randomize', command=self.randomize).grid(row=17, column=14, columnspan=2) Button(master, text='rotate right', command=self.right).grid(row=18, column=14, columnspan=2) Button(master, text='rotate left', command=self.left).grid(row=19, column=14, columnspan=2) Button(master, text='cam up', command=self.cam_up).grid(row=20, column=14, columnspan=2) Button(master, text='cam down', command=self.cam_down).grid(row=21, column=14, columnspan=2) Button(master, text='print config', command=self.print_config).grid(row=22, column=14, columnspan=2) self.b = Button(master, text="enter values", command=self.popup) self.b.grid(row=17, column=14, columnspan=2) width = 512 height = 512 self.renderer = Renderer(width, height) self.image = Image.fromarray(np.zeros((width, height), dtype=np.uint8)) self.canvas = Canvas(master, height=height, width=width) self.canvas.grid(row=0, column=14, rowspan=15) # image = image.resize((basewidth, hsize), PIL.Image.ANTIALIAS) self.photo = ImageTk.PhotoImage(self.image) self.photo_holder = self.canvas.create_image( width - (self.image.size[0] / 2), height - (self.image.size[1] / 2), image=self.photo) self.rot = 0 self.render() def popup(self): self.w = popupWindow(master) self.b["state"] = "disabled" master.wait_window(self.w.top) self.b["state"] = "normal" x = ast.literal_eval(self.entryValue()) self.set_values(x) def entryValue(self): return self.w.value def set_values(self, vs): if len(vs) != 160: print("ERROR: length of inputs should be 160, found:", len(vs)) return for i in range(160): self.values[i].set(vs[i]) def max(self): self.set_values([1.] * 160) def randomize(self): self.set_values(np.random.uniform(.5, 1, 160).tolist()) def right(self): if self.rotation_speed < 5: self.rotation_speed += 1 if self.rotation_speed == 1: self.render() def left(self): if self.rotation_speed > -5: self.rotation_speed -= 1 if self.rotation_speed == -1: self.render() print(self.rotation_speed) def render(self): self.image = self.renderer.render(self.get_values(), np.array((0, self.rot, 0))) self.photo = ImageTk.PhotoImage(self.image) self.canvas.itemconfig(self.photo_holder, image=self.photo) self.rot += 0.1 * np.sign(self.rotation_speed) / (2 * np.pi) if self.rotation_speed != 0: master.after(25 * (6 - abs(self.rotation_speed)), self.render) def get_values(self): v = [m.get() for m in self.values] return v def update_img(self, event): pass def print_config(self): print(self.get_values()) def cam_up(self): self.cam_z += 1 def cam_down(self): self.cam_z -= 1 def step1(self): self.image = Image.open("ball.gif") self.photo = ImageTk.PhotoImage(self.image) self.canvas.itemconfig(self.photo_holder, image=self.photo)
cube_generator = CubeGenerator(WIDTH, HEIGHT) torch.manual_seed(SEED) np.random.seed(SEED) agent = REINFORCE(HIDDEN_SIZE, WIDTH * HEIGHT * 3, 163, Policy) dir = 'ckpt_3dreinforcev1' if not os.path.exists(dir): os.mkdir(dir) for i_episode in range(NUM_EPISODES): target = cube_generator.sample() env_state = np.ones(160, dtype=np.float32) * .5 env_rot = np.zeros(3, dtype=np.float32) state = torch.Tensor([npa(target) - npa(env.render(env_state, env_rot)) ]).view(-1) entropies = [] log_probs = [] rewards = [] for t in range(NUM_STEPS): action, log_prob, entropy = agent.select_action(state) action = action.cpu() env_state = env_state + ALPHA_STATE * action[:160] env_rot = env_rot + ALPHA_ROT * action[ 160:] * 2 - 1 # convert from [0,1] to [-1,1] next_state = env.render(env_state, env_rot) reward = -np.linalg.norm(npa(target) - npa(next_state)).sum()
# vertex_params = policy.decode(action).detach().view(-1).cpu().numpy() vertex_params = action.detach().view(-1).cpu().numpy() if LOGGING: wandb.log({ "vertices mean": np.mean(vertex_params), "vertices min": np.min(vertex_params), "vertices max": np.max(vertex_params) }) vertex_params = np.clip(vertex_params, 0, 1) # render out an image for each of the K samples # IMPORTANT THIS CURRENTLY ASSUMES BATCH SIZE = 1 next_state = env.render(vertex_params, fixed_cam, cam_pos=data_generator.cam + .7) next_state = npa(next_state, dtype=np.float32) / 255 # next_state = make_greyscale(npa(next_state, dtype=np.float32)) # calculate reward for each one of the K samples reward_raw = -F.binary_cross_entropy(torch.tensor( next_state, requires_grad=False).permute(2, 0, 1).float(), state[0, :, :, :].float(), reduction='sum') rewards_raw.append(reward_raw) # deduct average reward of all K-1 samples (variance reduction) rewards = npa(rewards_raw) - np.mean(rewards_raw)
from threedee_tools.datasets import CubeLoader from threedee_tools.renderer import Renderer import numpy as np import matplotlib.pyplot as plt env = Renderer(128, 128, shape="ijcv") gen = CubeLoader() imga = gen.sample() print(gen.cam) print(gen.light) env.base_light = -gen.light + 1 imgb = env.render(np.ones((160)), np.array([0, 0, 0]), cam_pos=gen.cam + .7) imgb = np.array(imgb, dtype=np.float32) / 255 imgab = np.zeros((128, 128 * 2, 3), dtype=np.float32) imgab[:, :128, :] = imga imgab[:, 128:, :] = imgb plt.imshow(imgab) plt.show()
# vertex_params = policy.decode(action).detach().view(-1).cpu().numpy() vertex_params = action.detach().view(-1).cpu().numpy() if LOGGING: wandb.log({ "vertices mean": np.mean(vertex_params), "vertices min": np.min(vertex_params), "vertices max": np.max(vertex_params) }) vertex_params = np.clip(vertex_params, 0, 1) # render out an image for each of the K samples # IMPORTANT THIS CURRENTLY ASSUMES BATCH SIZE = 1 next_state = env.render(vertex_params, fixed_cam) next_state = make_greyscale(npa(next_state, dtype=np.float32)) # calculate reward for each one of the K samples reward_raw = -F.binary_cross_entropy(torch.tensor( next_state, requires_grad=False).permute(2, 0, 1).float(), state[0, :, :, :].float(), reduction='sum') rewards_raw.append(reward_raw) # deduct average reward of all K-1 samples (variance reduction) rewards = npa(rewards_raw) - np.mean(rewards_raw) returns = torch.tensor(rewards).float().to(device)