def test_wrapper(): env = gym.make('Pong-v0') env = gym_wrappers.RewardCountLimit(env, max_reward_count=5) obs = env.reset() env.render() done = False while not done: obs, r, done, info = env.step(cma_es.sample()) env.render()
def test_atari(): env = gym.make('PongNoFrameskip-v4') env = w.atari_preprocessing.AtariPreprocessing(env.unwrapped, terminal_on_life_loss=True) obs = env.reset() env.render() done = False while not done: obs, r, done, info = env.step(cma_es.sample()) env.render()
def test_expectation_my_multivariate_norm(): xrange = range(1, 50) analyticE = [expect_multivariate_norm(N) for N in range(1, 50)] E = [] for N in xrange: s, z = cma_es.sample(20, 1.0, torch.zeros(N), torch.eye(N), torch.eye(N)) E.append(sum([n.norm().item() for n in z.unbind(0)]) / 20) plt.plot(xrange, E, label='empirical') plt.plot(xrange, analyticE, label='analytic') plt.legend(loc='upper left') plt.show()
def test_pong(): v = UniImageViewer() l = UniImageViewer(title='processed', screen_resolution=(32, 32)) env = gym.make('Pong-v0') s = env.reset() done = False while not done: s, r, done, info = env.step(cma_es.sample()) v.render(s) s = d.pong_color_prepro(s) #s = cv2.cvtColor(s, cv2.COLOR_RGB2GRAY) #s = s[34:168, :] #s = cv2.resize(s, dsize=(32, 32), interpolation=cv2.INTER_AREA) l.render(s)
def test_patch(): args = config.config(['--config', '../configs/cma_es/exp2/baseline.yaml']) torch.manual_seed(0) datapack = keypoints.ds.datasets.datasets[args.dataset] env = gym.make(datapack.env) env = gym_wrappers.RewardCountLimit(env, 5) done = False env.reset() transporter_net = transporter.make(args, map_device='cpu') view = main.Keypoints(transporter_net) while not done: s, r, done, info = env.step(cma_es.sample()) s = datapack.prepro(s) s_t = datapack.transforms(s).unsqueeze(0) kp = view(s_t) print(kp) env.render()
def test_hyperparams(): objective_f = akley features = 2 step_size = 1.0 epochs = 1e3 * features**2 # selection settings samples = 4 + floor(3 * log(features)) mu = samples / 2 weights = torch.tensor([log(mu + 0.5)]) - torch.linspace( start=1, end=mu, steps=floor(mu)).log() weights = weights / weights.sum() mu = floor(mu) mueff = (weights.sum()**2 / (weights**2).sum()).item() ''' cc = (4 + mueff / N) / (N + 4 + 2 * mueff / N); cs = (mueff + 2) / (N + mueff + 5); c1 = 2 / ((N + 1.3)ˆ2+mueff); cmu = 2 * (mueff - 2 + 1 / mueff) / ((N + 2)ˆ2+2 * mueff / 2); damps = 1 + 2 * max(0, sqrt((mueff - 1) / (N + 1)) - 1) + cs; ''' # adaptation settings cc = (4 + mueff / features) / (features + 4 + 2 * mueff / features) cs = (mueff + 2) / (features + mueff + 5) c1 = 2 / ((features + 1.3)**2 + mueff) cmu = 2 * (mueff - 2 + 1 / mueff) / ((features + 2)**2 + 2 * mueff / 2) damps = 1 + 2 * max(0.0, sqrt((mueff - 1.0) / (features + 1)) - 1) + cs chiN = expect_multivariate_norm(features) mean = torch.zeros(features) b = torch.eye(features) d = torch.eye(features) c = torch.matmul(b.matmul(d), b.matmul(d).T) pc = torch.zeros(features) ps = torch.zeros(features) print( f'mu: {mu}. mueff: {mueff}, cc : {cc}, cs: {cs}, c1: {c1}, cmu: {cmu}, damps: {damps}, chiN:{chiN}' ) plt.title('weights') plt.plot(weights) print(weights) plt.show() step_size_l = [step_size] correlation_l = [1.0] ps_l = [ps[0].item()] fitness_l = [0] plot_freq = 1 for counteval in range(1, 10): # sample parameters s, z = cma_es.sample(samples, step_size, mean, b, d) # rank by fitness f = objective_f(s[:, 0], s[:, 1]) g = [{ 'sample': s[i], 'z': z[i], 'fitness': f.item() } for i, f in enumerate(f)] g = sorted(g, key=lambda x: x['fitness'], reverse=True) g = g[0:mu] fitness_l.append(g[0]['fitness']) z = torch.stack([g['z'] for g in g]) g = torch.stack([g['sample'] for g in g]) if counteval % plot_freq == 0: plot_heatmap('sample ', counteval, mean, b, d, samples=s, g=g, chiN=chiN, step_size=step_size) # backup mean_prev = mean.clone() prev_cov = c.clone() g_raw = g.clone() mean = (g * weights.unsqueeze(1)).sum(0) zmean = (z * weights.unsqueeze(1)).sum(0) # step size ps = (1 - cs) * ps + sqrt(cs * (2.0 - cs)) * b.matmul(zmean) correlation = ps.norm() / chiN ps_l.append(ps[0].item()) correlation_l.append(correlation.item()) # delay the introduction of the rank 1 update denominator = sqrt(1 - (1 - cs)**(2 * counteval / samples)) threshold = 1.4e2 / features + 1 hsig = correlation / denominator < threshold hsig = 1.0 if hsig else 0.0 #step_size = step_size * ((cs / damps) * (correlation - 1.0)).exp() step_size = step_size * ((cs / damps) * (correlation - 1.0)).exp() step_size_l.append(step_size) # a mind bending way to write a exponential smoothed moving average # zmean does not contain step size or mean, so allows us to add together # updates of different step sizes pc = (1 - cc) * pc + hsig * sqrt( cc * (2.0 - cc) * mueff) * b.matmul(d).matmul(zmean) # which we then combine to make a covariance matrix, from 1 (mean) datapoint! # this is why it's called "rank 1" update pc_cov = pc.unsqueeze(1).matmul(pc.unsqueeze(1).t()) # mix back in the old covariance if hsig == 0 pc_cov = pc_cov + (1 - hsig) * cc * (2 - cc) * prev_cov # estimate cov for all selected samples (weighted by rank) bdz = b.matmul(d).matmul(z.t()) cmu_cov = torch.matmul(bdz, weights.diag_embed()) cmu_cov = cmu_cov.matmul(bdz.t()) c = (1.0 - c1 - cmu) * prev_cov + (c1 * pc_cov) + (cmu * cmu_cov) # pull out the eigenthings and do the business d, b = torch.symeig(c, eigenvectors=True) d = d.sqrt().diag_embed() if counteval % plot_freq == 0: plot_heatmap('select', counteval, mean, b, d, g=g_raw, chiN=chiN, step_size=step_size)
def test_rank_mu_and_rank_one_update_with_step_size_control(): features = 2 step_size = 1.0 epochs = 1e3 * features**2 # selection settings samples = 4 + floor(3 * log(features)) mu = samples / 2 weights = log(mu + 0.5) + torch.linspace(start=1, end=mu, steps=floor(mu)).log() weights = weights / weights.sum() mu = floor(mu) mueff = (weights.sum()**2 / (weights**2).sum()).item() ''' cc = (4 + mueff / N) / (N + 4 + 2 * mueff / N); cs = (mueff + 2) / (N + mueff + 5); c1 = 2 / ((N + 1.3)ˆ2+mueff); cmu = 2 * (mueff - 2 + 1 / mueff) / ((N + 2)ˆ2+2 * mueff / 2); damps = 1 + 2 * max(0, sqrt((mueff - 1) / (N + 1)) - 1) + cs; ''' # adaptation settings #cmu = mueff / features ** 2 cc = (4 + mueff / features) / (features + 4 + 2 * mueff / features) # cs = (mueff + 2) / (features + mueff + 5) cs = 0.95 # c1 = 2 / ((features + 1.3) ** 2 + mueff) c1 = 0.3 # cmu = 2 * (mueff - 2 + 1 / mueff) / ((features + 2)**2 + 2 * mueff / 2) cmu = 0.3 damps = 1 + 2 * max(0.0, sqrt((mueff - 1.0) / (features + 1)) - 1) + cs damps = 1.0 chiN = expect_multivariate_norm(features) print( f'cc : {cc}, cs: {cs}, c1: {c1}, cmu: {cmu}, damps: {damps}, chiN:{chiN}' ) plt.title('weights') plt.plot(weights) plt.show() mean = torch.zeros(features) b = torch.eye(features) d = torch.eye(features) c = torch.matmul(b.matmul(d), b.matmul(d).T) pc = torch.zeros(features) ps = torch.zeros(features) for counteval in range(8): # sample parameters s, z = cma_es.sample(samples, step_size, mean, b, d) # rank by fitness f = spike(s[:, 0], s[:, 1]) g = [{ 'sample': s[i], 'z': z[i], 'fitness': f.item() } for i, f in enumerate(f)] g = sorted(g, key=lambda x: x['fitness'], reverse=True) g = g[0:mu] z = torch.stack([g['z'] for g in g]) g = torch.stack([g['sample'] for g in g]) plot_heatmap('sample ', counteval, mean, b, d, samples=s, g=g, chiN=chiN) # backup mean_prev = mean.clone() c_prev = c.clone() g_raw = g.clone() mean = (g * weights.unsqueeze(1)).sum(0) zmean = (z * weights.unsqueeze(1)).sum(0) # step size ps = (1 - cs) * ps + cs * b.matmul(zmean) step_size = step_size * ((cs / damps) * (ps.norm() / chiN - 1.0)).exp() # a mind bending way to write a exponential smoothed moving average # zmean does not contain step size or mean, so allows us to add together # updates of different step sizes pc = (1 - cc) * pc + cc * b.matmul(d).matmul(zmean) # which we then combine to make a covariance matrix, from 1 (mean) datapoint! # this is why it's called "rank 1" update cov_pc = pc.unsqueeze(1).matmul(pc.unsqueeze(1).t()) # estimate cov for all selected samples (weighted by rank) bdz = b.matmul(d).matmul(z.t()) cmu_cov = torch.matmul(bdz, weights.diag_embed()) cmu_cov = cmu_cov.matmul(bdz.t()) c = (1.0 - c1 - cmu) * c_prev + c1 * cov_pc + cmu * cmu_cov # pull out the eigenthings and do the business d, b = torch.symeig(c, eigenvectors=True) d = d.sqrt().diag_embed() plot_heatmap('select', counteval, mean, b, d, g=g_raw, chiN=chiN, step_size=step_size) time.sleep(0.5)
def test_rank_one_update(): features = 2 step_size = 1.0 epochs = 1e3 * features**2 # selection settings samples = 4 + floor(3 * log(features)) mu = samples / 2 weights = log(mu + 0.5) + torch.linspace(start=1, end=mu, steps=floor(mu)).log() weights = torch.flip(weights, dims=(0, )) / weights.sum() mu = floor(mu) mueff = (weights.sum()**2 / (weights**2).sum()).item() ''' cc = (4 + mueff / N) / (N + 4 + 2 * mueff / N); cs = (mueff + 2) / (N + mueff + 5); c1 = 2 / ((N + 1.3)ˆ2+mueff); cmu = 2 * (mueff - 2 + 1 / mueff) / ((N + 2)ˆ2+2 * mueff / 2); damps = 1 + 2 * max(0, sqrt((mueff - 1) / (N + 1)) - 1) + cs; ''' # adaptation settings #cmu = mueff / features ** 2 cc = (4 + mueff / features) / (features + 4 + 2 * mueff / features) cs = (mueff + 2) / (features + mueff + 5) # c1 = 2 / ((features + 1.3) ** 2 + mueff) c1 = 0.5 cmu = 2 * (mueff - 2 + 1 / mueff) / ((features + 2)**2 + 2 * mueff / 2) damps = 1 + 2 * max(0.0, sqrt((mueff - 1.0) / (features + 1)) - 1) + cs print(f'cc : {cc}, cs: {cs}, c1: {c1}, cmu: {cmu}, damps: {damps}') plt.title('weights') plt.plot(weights) plt.show() mean = torch.zeros(features) b = torch.eye(features) d = torch.eye(features) c = torch.matmul(b.matmul(d), b.matmul(d).T) pc = torch.zeros(features) for counteval in range(8): # sample parameters s, z = cma_es.sample(samples, step_size, mean, b, d) # rank by fitness f = spike(s[:, 0], s[:, 1]) g = [{ 'sample': s[i], 'z': z[i], 'fitness': f.item() } for i, f in enumerate(f)] g = sorted(g, key=lambda x: x['fitness'], reverse=True) g = g[0:mu] z = torch.stack([g['z'] for g in g]) g = torch.stack([g['sample'] for g in g]) plot_heatmap('sample ', counteval, mean, b, d, samples=s, g=g) mean_prev = mean.clone() c_prev = c.clone() g_raw = g.clone() mean = (g * weights.unsqueeze(1)).sum(0) zmean = (z * weights.unsqueeze(1)).sum(0) # a mind bending way to write a exponential smoothed moving average for the variance # zmean does not contain step size or mean, so allows us to add together # updates of different step sizes pc = (1 - cc) * pc + cc * b.matmul(d).matmul(zmean) cov_pc = pc.unsqueeze(1).matmul(pc.unsqueeze(1).t()) # update covariance from smoothed mean in zspace c = (1 - c1) * c + c1 * cov_pc # estimate weighted covariance in z-space # t = b.matmul(d).matmul(z.t()) # c = torch.matmul(t, weights.diag_embed()) # c = c.matmul(t.t()) # c = (1.0 - cmu) * c_prev + cmu * c d, b = torch.symeig(c, eigenvectors=True) d = d.sqrt().diag_embed() plot_heatmap('select', counteval, mean, b, d, g=g_raw) time.sleep(0.5)
def test_rank_mu_update(): features = 2 step_size = 1.0 epochs = 1e3 * features**2 # selection settings samples = 4 + floor(3 * log(features)) mu = samples / 2 weights = log(mu + 0.5) + torch.linspace(start=1, end=mu, steps=floor(mu)).log() weights = torch.flip(weights, dims=(0, )) / weights.sum() mu = floor(mu) mueff = weights.sum()**2 / (weights**2).sum() # adaptation settings cmu = mueff / features**2 print(cmu) plt.title('weights') plt.plot(weights) plt.show() mean = torch.zeros(features) b = torch.eye(features) d = torch.eye(features) c = torch.matmul(b.matmul(d), b.matmul(d).T) for counteval in range(4): # sample parameters s, z = cma_es.sample(samples, step_size, mean, b, d) # rank by fitness f = spike(s[:, 0], s[:, 1]) g = [{ 'sample': s[i], 'z': z[i], 'fitness': f.item() } for i, f in enumerate(f)] g = sorted(g, key=lambda x: x['fitness'], reverse=True) g = g[0:mu] z = torch.stack([g['z'] for g in g]) g = torch.stack([g['sample'] for g in g]) plot_heatmap('sample ', counteval, mean, b, d, samples=s, g=g) c_prev = c.clone() g_raw = g.clone() mean = (g * weights.unsqueeze(1)).sum(0) zmean = (z * weights.unsqueeze(1)).sum(0) # estimate weighted covariance in z-space t = b.matmul(d).matmul(z.t()) c = torch.matmul(t, weights.diag_embed()) c = c.matmul(t.t()) c = (1.0 - cmu) * c_prev + cmu * c d, b = torch.symeig(c, eigenvectors=True) d = d.sqrt().diag_embed() plot_heatmap('select', counteval, mean, b, d, g=g_raw) time.sleep(0.5)
from torchvision.transforms import functional as TVF if __name__ == '__main__': args = config.config() with torch.no_grad(): v = UniImageViewer() datapack = ds.datasets[args.dataset] transporter_net = transporter.make(args).to(args.device) if args.load is not None: transporter_net.load(args.load) env = gym.make(datapack.env) while True: s = env.reset() done = False while not done: s, r, done, i = env.step(cma_es.sample()) s = datapack.prepro(s) s_t = datapack.transforms(s).unsqueeze(0).to(args.device) heatmap = transporter_net.keypoint(s_t) kp = KF.spacial_logsoftmax(heatmap) s = TVF.to_tensor(s).unsqueeze(0) s = plot_keypoints_on_image(kp[0], s[0]) v.render(s) time.sleep(0.04)
'scratch': Pos(atari_width * 2 + 80, 400), } xpos, ypos = None, None imageid = 0 # the main application loop while not glfw.window_should_close(window): glfw.poll_events() xpos, ypos = glfw.get_cursor_pos(window) glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) # take a step in the environment image_data, r, done, info = env.step(cma_es.sample()) glBindTexture(GL_TEXTURE_2D, texture) glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, image_data.shape[1], image_data.shape[0], 0, GL_RGB, GL_UNSIGNED_BYTE, image_data) glTexParameter(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST) glTexParameter(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST) if done: env.reset() # render reference screen used for sampling glViewport(anchor['source'].x, anchor['source'].y, atari_width, atari_height) projection = pyrr.matrix44.create_orthogonal_projection_matrix( 0, atari_width, 0, atari_height, -1000, 1000) glUniformMatrix4fv(proj_loc, 1, GL_FALSE, projection) glUniformMatrix4fv(model_loc, 1, GL_FALSE, atari_screen1_model)