Ejemplo n.º 1
0
            ax.text(0.4,
                    1.04,
                    'Recon',
                    transform=ax.transAxes,
                    family='serif',
                    size=6)

            #Plot the grad
            ax = plt.subplot2grid((rows, cols), (i, 3), frameon=False)

            frame = dataset[traj_ind][start_ind + i][1]  #[2,84,84]
            x = Variable(torch.from_numpy(np.array([frame])).float(),
                         requires_grad=True).cuda()

            # dist = policy.action_dist(x)
            dist = policy.action_logdist(x)
            grad = torch.autograd.grad(torch.sum(dist[:, 3]), x)[0]
            grad = grad.data.cpu().numpy()[
                0]  #for the first one in teh batch -> [2,84,84]
            grad = np.abs(grad)

            state1 = np.concatenate([grad[0], grad[1]], axis=1)
            # ax.imshow(state1, cmap='gray', norm=NoNorm())
            ax.imshow(state1, cmap='gray')
            ax.set_xticks([])
            ax.set_yticks([])

            ax.text(0.4,
                    1.04,
                    'Grad of Real',
                    transform=ax.transAxes,
Ejemplo n.º 2
0
    losses = []
    for m in range(mc_samples):

        eps = Variable(torch.Tensor(1, 2, 84, 84).normal_(0, 1)).cuda()
        noisy_frame = frame + eps * (torch.sqrt(torch.exp(logvar)))

        noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val)

        # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data)
        # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data)
        # noisy_frame = noisy_frame * max_val

        dist_noise = policy.action_dist(noisy_frame)

        log_dist_noise = policy.action_logdist(noisy_frame)
        log_dist_true = policy.action_logdist(frame)

        action_dist_kl = torch.sum(
            (log_dist_true - log_dist_noise) * torch.exp(log_dist_true),
            dim=1)  #[B]

        action_dist_kl = torch.mean(action_dist_kl)  # * 1000.

        # std of 2 is the prior, which is 4 for var, which is log(4) for logvar
        # loss = action_dist_kl + (logvar - torch.log(4))**2
        logvar_dif = torch.mean((logvar - .6)**2) * .0001

        loss = action_dist_kl + logvar_dif

        losses.append(loss)
Ejemplo n.º 3
0
    losses = []
    for m in range(mc_samples):

        eps = Variable(torch.Tensor(1,2,84,84).normal_(0,1)).cuda()
        noisy_frame = frame + eps*(torch.sqrt(torch.exp(logvar)))

        noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val)

        # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data)
        # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data)
        # noisy_frame = noisy_frame * max_val

        dist_noise = policy.action_dist(noisy_frame)

        log_dist_noise = policy.action_logdist(noisy_frame)
        log_dist_true = policy.action_logdist(frame)

        action_dist_kl = torch.sum((log_dist_true - log_dist_noise)*torch.exp(log_dist_true), dim=1) #[B]


        action_dist_kl = torch.mean(action_dist_kl) # * 1000.

        # std of 2 is the prior, which is 4 for var, which is log(4) for logvar
        # loss = action_dist_kl + (logvar - torch.log(4))**2
        logvar_dif = torch.mean((logvar - .6)**2) *.0001

        loss = action_dist_kl + logvar_dif

        losses.append(loss)
Ejemplo n.º 4
0
    # losses = []
    # for m in range(mc_samples):

    # eps = Variable(torch.Tensor(1,2,84,84).normal_(0,1)).cuda()
    # noisy_frame = frame + eps*(torch.sqrt(torch.exp(mask)))
    # noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val)

    # print (fra)

    # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data)
    # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data)
    # noisy_frame = noisy_frame * max_val

    # dist_mask = policy.action_dist(masked_frame)
    log_dist_mask = policy.action_logdist(masked_frame)
    log_dist_true = policy.action_logdist(frame)

    action_dist_kl = torch.sum(
        (log_dist_true - log_dist_mask) * torch.exp(log_dist_true),
        dim=1)  #[B]

    action_dist_kl = torch.mean(action_dist_kl)  # * 1000

    # std of 2 is the prior, which is 4 for var, which is log(4) for mask
    # loss = action_dist_kl + (mask - torch.log(4))**2
    # mask_dif = torch.mean((mask - .6)**2) *.0001

    mask_sum = torch.sum(mask_sig) * .0001

    loss = action_dist_kl + mask_sum