ax.text(0.4, 1.04, 'Recon', transform=ax.transAxes, family='serif', size=6) #Plot the grad ax = plt.subplot2grid((rows, cols), (i, 3), frameon=False) frame = dataset[traj_ind][start_ind + i][1] #[2,84,84] x = Variable(torch.from_numpy(np.array([frame])).float(), requires_grad=True).cuda() # dist = policy.action_dist(x) dist = policy.action_logdist(x) grad = torch.autograd.grad(torch.sum(dist[:, 3]), x)[0] grad = grad.data.cpu().numpy()[ 0] #for the first one in teh batch -> [2,84,84] grad = np.abs(grad) state1 = np.concatenate([grad[0], grad[1]], axis=1) # ax.imshow(state1, cmap='gray', norm=NoNorm()) ax.imshow(state1, cmap='gray') ax.set_xticks([]) ax.set_yticks([]) ax.text(0.4, 1.04, 'Grad of Real', transform=ax.transAxes,
losses = [] for m in range(mc_samples): eps = Variable(torch.Tensor(1, 2, 84, 84).normal_(0, 1)).cuda() noisy_frame = frame + eps * (torch.sqrt(torch.exp(logvar))) noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val) # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data) # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data) # noisy_frame = noisy_frame * max_val dist_noise = policy.action_dist(noisy_frame) log_dist_noise = policy.action_logdist(noisy_frame) log_dist_true = policy.action_logdist(frame) action_dist_kl = torch.sum( (log_dist_true - log_dist_noise) * torch.exp(log_dist_true), dim=1) #[B] action_dist_kl = torch.mean(action_dist_kl) # * 1000. # std of 2 is the prior, which is 4 for var, which is log(4) for logvar # loss = action_dist_kl + (logvar - torch.log(4))**2 logvar_dif = torch.mean((logvar - .6)**2) * .0001 loss = action_dist_kl + logvar_dif losses.append(loss)
losses = [] for m in range(mc_samples): eps = Variable(torch.Tensor(1,2,84,84).normal_(0,1)).cuda() noisy_frame = frame + eps*(torch.sqrt(torch.exp(logvar))) noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val) # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data) # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data) # noisy_frame = noisy_frame * max_val dist_noise = policy.action_dist(noisy_frame) log_dist_noise = policy.action_logdist(noisy_frame) log_dist_true = policy.action_logdist(frame) action_dist_kl = torch.sum((log_dist_true - log_dist_noise)*torch.exp(log_dist_true), dim=1) #[B] action_dist_kl = torch.mean(action_dist_kl) # * 1000. # std of 2 is the prior, which is 4 for var, which is log(4) for logvar # loss = action_dist_kl + (logvar - torch.log(4))**2 logvar_dif = torch.mean((logvar - .6)**2) *.0001 loss = action_dist_kl + logvar_dif losses.append(loss)
# losses = [] # for m in range(mc_samples): # eps = Variable(torch.Tensor(1,2,84,84).normal_(0,1)).cuda() # noisy_frame = frame + eps*(torch.sqrt(torch.exp(mask))) # noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val) # print (fra) # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data) # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data) # noisy_frame = noisy_frame * max_val # dist_mask = policy.action_dist(masked_frame) log_dist_mask = policy.action_logdist(masked_frame) log_dist_true = policy.action_logdist(frame) action_dist_kl = torch.sum( (log_dist_true - log_dist_mask) * torch.exp(log_dist_true), dim=1) #[B] action_dist_kl = torch.mean(action_dist_kl) # * 1000 # std of 2 is the prior, which is 4 for var, which is log(4) for mask # loss = action_dist_kl + (mask - torch.log(4))**2 # mask_dif = torch.mean((mask - .6)**2) *.0001 mask_sum = torch.sum(mask_sig) * .0001 loss = action_dist_kl + mask_sum