Ejemplo n.º 1
0
 def loss(self, model_output):
     losses = AttrDict()
     setattr(
         losses, 'mse',
         torch.nn.MSELoss()(model_output.action.squeeze(),
                            self.labels.to(self._hp.device)))
     # compute total loss
     losses.total_loss = torch.stack(list(losses.values())).sum()
     return losses
    def loss(self, model_output):
        losses = AttrDict()
        for i_cl, cl in enumerate(self.tdist_classifiers):
            setattr(losses, 'tdist{}'.format(cl.tdist),
                    cl.loss(model_output[i_cl]))

        # compute total loss
        losses.total_loss = torch.stack(list(losses.values())).sum()
        return losses
    def loss(self, model_output):
        losses = AttrDict()
        setattr(
            losses, 'cross_entropy',
            torch.nn.CrossEntropyLoss()(model_output.logits,
                                        self.labels.to(self._hp.device)))

        # compute total loss
        losses.total_loss = torch.stack(list(losses.values())).sum()
        return losses
Ejemplo n.º 4
0
    def loss(self, model_output):
        if self._hp.low_dim:
            image_pairs = self.images[:, self._hp.state_size:]
        else:
            image_pairs = self.images[:, 3:]

        losses = AttrDict()

        if self._hp.min_q:
            # Implement minq loss
            total_min_q_loss = []
            self.min_q_lse = 0
            for i, q_fn in enumerate(self.qnetworks):
                random_q_values = self.network_out_2_qval(
                    self.compute_action_samples(self.get_sg_pair(self.images),
                                                q_fn,
                                                parallel=True,
                                                detach_grad=False))
                random_density = np.log(
                    0.5**self._hp.action_size)  # log uniform density
                random_q_values -= random_density
                min_q_loss = torch.logsumexp(random_q_values, dim=0) - np.log(
                    self._hp.est_max_samples)
                min_q_loss = min_q_loss.mean()
                self.min_q_lse += min_q_loss
                total_min_q_loss.append(min_q_loss - model_output[i].mean())
            total_min_q_loss = self.cql_sign * torch.stack(
                total_min_q_loss).mean()
            if self._hp.min_q_lagrange and hasattr(self, 'log_alpha'):
                min_q_weight = self.log_alpha.exp().squeeze()
                total_min_q_loss -= self._hp.min_q_eps
            else:
                min_q_weight = self._hp.min_q_weight
            losses.min_q_loss = min_q_weight * total_min_q_loss
            self.min_q_lagrange_loss = -1 * losses.min_q_loss

        losses.bellman_loss = self._hp.bellman_weight * self.get_td_error(
            image_pairs, model_output)

        losses.total_loss = torch.stack(list(losses.values())).sum()

        if 'min_q_loss' in losses:
            losses.min_q_loss /= min_q_weight  # Divide this back out so we can compare log likelihoods
        return losses
Ejemplo n.º 5
0
    def loss(self, model_output):
        if self._hp.low_dim:
            image_pairs = self.images[:, 2:]
        else:
            image_pairs = self.images[:, 3:]

        ## Get max_a Q (s_t+1) (Is a min since lower is better)
        qs = []
        for ns in range(100):
            actions = torch.FloatTensor(
                model_output.size(0), self._hp.action_size).uniform_(-1,
                                                                     1).cuda()
            targetq = self.target_qnetwork(image_pairs, actions)
            qs.append(targetq)
        qs = torch.stack(qs)
        qval = torch.sum(
            (1 + torch.arange(qs.shape[2])[None]).float().to(self._hp.device) *
            qs, 2)
        ## Select corresponding target Q distribution
        ids = qval.min(0)[1]
        newqs = []
        for k in range(self._hp.batch_size * 2):
            newqs.append(qs[ids[k], k])
        qs = torch.stack(newqs)

        ## Shift Q*(s_t+1) to get Q*(s_t)
        shifted = torch.zeros(qs.size()).to(self._hp.device)
        shifted[:, 1:] = qs[:, :-1]
        shifted[:, -1] += qs[:, -1]
        lb = self.labels.to(self._hp.device).unsqueeze(-1)
        isg = torch.zeros((self._hp.batch_size * 2, 10)).to(self._hp.device)
        isg[:, 0] = 1

        ## If next state is goal then target should be 0, else should be shifted
        losses = AttrDict()
        target = (lb * isg) + ((1 - lb) * shifted)

        ## KL between target and output
        log_q = self.out_softmax.clamp(1e-5, 1 - 1e-5).log()
        log_t = target.clamp(1e-5, 1 - 1e-5).log()
        losses.total_loss = (target * (log_t - log_q)).sum(1).mean()

        self.target_qnetwork.load_state_dict(self.qnetwork.state_dict())
        return losses
Ejemplo n.º 6
0
    def loss(self, model_output):
        #         BCE = F.binary_cross_entropy(self.rec.view(-1, 3, 64, 64), self.images.view(-1, 3, 64, 64), size_average=False)
        #         BCE = F.mse_loss(self.rec.view(-1, 3, 64, 64), ((self.images.view(-1, 3, 64, 64) + 1 ) / 2.0), size_average=False)
        BCE = ((self.rec - ((self.images + 1) / 2.0))**2).mean()
        for i in range(10):
            rec = self.rec[i, 0].permute(1, 2,
                                         0).cpu().detach().numpy() * 255.0
            im = ((self.images + 1) / 2.0)[i, 0].permute(
                1, 2, 0).cpu().detach().numpy() * 255.0
            ex = np.concatenate([rec, im], 0)
            cv2.imwrite("ex" + str(i) + ".png", ex)

#         print(BCE)
        KLD = -0.5 * torch.mean(1 + self.logvar - self.mu.pow(2) -
                                self.logvar.exp())
        #         print(KLD)
        losses = AttrDict()
        losses.total_loss = BCE + 0.00001 * KLD
        return losses
Ejemplo n.º 7
0
    def loss(self, model_output):
        if self._hp.low_dim:
            image_pairs = self.images[:, 2:]
        else:
            image_pairs = self.images[:, 3:]

        qs = []
        for ns in range(100):
            actions = torch.FloatTensor(
                model_output.size(0), self._hp.action_size).uniform_(-1,
                                                                     1).cuda()
            targetq = self.target_qnetwork(image_pairs, actions)
            qs.append(targetq)
        qs = torch.stack(qs)
        lb = self.labels.to(self._hp.device)

        losses = AttrDict()
        target = lb + self._hp.gamma * torch.max(qs, 0)[0].squeeze()
        losses.total_loss = F.mse_loss(target, model_output.squeeze())

        self.target_qnetwork.load_state_dict(self.qnetwork.state_dict())
        return losses