frame = Variable(frame1).cuda() losses = [] for m in range(mc_samples): eps = Variable(torch.Tensor(1, 2, 84, 84).normal_(0, 1)).cuda() noisy_frame = frame + eps * (torch.sqrt(torch.exp(logvar))) noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val) # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data) # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data) # noisy_frame = noisy_frame * max_val dist_noise = policy.action_dist(noisy_frame) log_dist_noise = policy.action_logdist(noisy_frame) log_dist_true = policy.action_logdist(frame) action_dist_kl = torch.sum( (log_dist_true - log_dist_noise) * torch.exp(log_dist_true), dim=1) #[B] action_dist_kl = torch.mean(action_dist_kl) # * 1000. # std of 2 is the prior, which is 4 for var, which is log(4) for logvar # loss = action_dist_kl + (logvar - torch.log(4))**2 logvar_dif = torch.mean((logvar - .6)**2) * .0001 loss = action_dist_kl + logvar_dif
frame = Variable(frame1).cuda() losses = [] for m in range(mc_samples): eps = Variable(torch.Tensor(1,2,84,84).normal_(0,1)).cuda() noisy_frame = frame + eps*(torch.sqrt(torch.exp(logvar))) noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val) # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data) # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data) # noisy_frame = noisy_frame * max_val dist_noise = policy.action_dist(noisy_frame) log_dist_noise = policy.action_logdist(noisy_frame) log_dist_true = policy.action_logdist(frame) action_dist_kl = torch.sum((log_dist_true - log_dist_noise)*torch.exp(log_dist_true), dim=1) #[B] action_dist_kl = torch.mean(action_dist_kl) # * 1000. # std of 2 is the prior, which is 4 for var, which is log(4) for logvar # loss = action_dist_kl + (logvar - torch.log(4))**2 logvar_dif = torch.mean((logvar - .6)**2) *.0001 loss = action_dist_kl + logvar_dif
dataset[traj_ind][start_ind + i][1][1] ], axis=1) ax.imshow(state1, cmap='gray') ax.set_xticks([]) ax.set_yticks([]) for a in range(4): #Plot grads ax = plt.subplot2grid((rows, cols), (i, 1 + a), frameon=False) x = Variable(torch.from_numpy( np.array([dataset[traj_ind][start_ind + i][1]])).float(), requires_grad=True).cuda() dist = policy.action_dist(x) grad = torch.autograd.grad(torch.sum(dist[:, a]), x)[0] grad = grad.data.cpu().numpy()[ 0] #for the first one in teh batch -> [2,84,84] grad = np.abs(grad) # print (np.max(grad)) # print (np.min(grad)) # print (np.mean(grad)) # fad state1 = np.concatenate([grad[0], grad[1]], axis=1) # ax.imshow(state1, cmap='gray', norm=NoNorm()) ax.imshow(state1, cmap='gray') ax.set_xticks([]) ax.set_yticks([])