Esempio n. 1
0
    def fit(self, *args, **kwargs):
        for t in trange(self._n_epochs, disable=self._quiet):
            if self._batch_size > 0:
                batches = minibatch_generator(self._batch_size, *args)
            else:
                batches = [args]

            loss_current = list()
            for batch in tqdm(batches, disable=self._quiet):

                if self._device is None:
                    torch_args = [torch.from_numpy(x) for x in batch]
                else:
                    torch_args = [
                        torch.from_numpy(x).cuda(self._device) for x in args
                    ]

                x = torch_args[:-1]
                y = torch_args[-1]

                y_hat = self._network(*x, **kwargs)
                loss = self._loss(y_hat, y.type(y_hat.type()))
                loss_current.append(loss.item())

                self._optimizer.zero_grad()
                loss.backward()
                self._optimizer.step()

            if not self._quiet:
                tqdm.write('loss at epoch ' + str(t) + ' ' +
                           str(np.mean(loss_current)))
Esempio n. 2
0
    def fit(self, *args):
        if self._batch_size > 0:
            batches = minibatch_generator(self._batch_size, args)
        else:
            batches = [args]

        for _ in trange(self._n_epochs, disable=self._quiet):
            for batch in tqdm(batches, disable=self._quiet):
                if len(args) == 3:
                    s = torch.from_numpy(batch[0])
                    a = torch.from_numpy(batch[1]).long()
                    q = torch.from_numpy(batch[2])

                    if self._device is None:
                        x = [s, a]
                        y = q
                    else:
                        x = [s.cuda(self._device), a.cuda(self._device)]
                        y = q.cuda(self._device)

                elif len(args) == 2:
                    if self._device is None:
                        x = [torch.from_numpy(batch[0])]
                        y = torch.from_numpy(batch[1])
                    else:
                        x = [torch.from_numpy(batch[0]).cuda(self._device)]
                        y = torch.from_numpy(batch[1].cuda(self._device))

                y_hat = self._network(*x)
                loss = self._loss(y_hat, y)
                self._optimizer.zero_grad()
                loss.backward()
                self._optimizer.step()
Esempio n. 3
0
    def _fit_epoch(self, args, kwargs):
        if self._batch_size > 0:
            batches = minibatch_generator(self._batch_size, *args)
        else:
            batches = [args]

        loss_current = list()
        for batch in batches:
            loss_current.append(self._fit_batch(batch, args, kwargs))

        return np.mean(loss_current)
Esempio n. 4
0
 def _update_policy(self, obs, act, adv, old_log_p):
     for epoch in range(self._n_epochs_policy):
         for obs_i, act_i, adv_i, old_log_p_i in minibatch_generator(
                 self._batch_size, obs, act, adv, old_log_p):
             self._optimizer.zero_grad()
             prob_ratio = torch.exp(
                 self.policy.log_prob_t(obs_i, act_i) - old_log_p_i
             )
             clipped_ratio = torch.clamp(prob_ratio, 1 - self._eps_ppo,
                                         1 + self._eps_ppo)
             loss = -torch.mean(torch.min(prob_ratio * adv_i,
                                          clipped_ratio * adv_i))
             loss.backward()
             self._optimizer.step()
Esempio n. 5
0
    def fit(self, *args, **kwargs):
        if self._dropout:
            self._network.train()

        for t in trange(self._n_epochs, disable=self._quiet):
            if self._batch_size > 0:
                batches = minibatch_generator(self._batch_size, *args)
            else:
                batches = [args]

            loss_current = list()
            for batch in tqdm(batches, disable=self._quiet):
                if not self._use_cuda:
                    torch_args = [torch.from_numpy(x) for x in batch]
                else:
                    torch_args = [torch.from_numpy(x).cuda() for x in args]

                x = torch_args[:-1]
                y_hat = self._network(*x, **kwargs)

                if isinstance(y_hat, tuple):
                    output_type = y_hat[0].dtype
                else:
                    output_type = y_hat.dtype

                y = torch.tensor(torch_args[-1], dtype=output_type)

                if self._use_cuda:
                    y = y.cuda()

                loss = self._loss(y_hat, y)
                loss_current.append(loss.item())

                self._optimizer.zero_grad()
                loss.backward()
                self._optimizer.step()

            if not self._quiet:
                tqdm.write('loss at epoch ' + str(t) + ' ' +
                           str(np.mean(loss_current)))

        if self._dropout:
            self._network.eval()
Esempio n. 6
0
    def fit(self, *args):
        if self._batch_size > 0:
            batches = minibatch_generator(self._batch_size, args)
        else:
            batches = [args]

        for _ in trange(self._n_epochs, disable=self._quiet):
            for batch in tqdm(batches, disable=self._quiet):

                if self._device is None:
                    torch_args = [torch.from_numpy(x) for x in batch]
                else:
                    torch_args = [
                        torch.from_numpy(x).cuda(self._device) for x in args
                    ]

                x = torch_args[:-1]
                y = torch_args[-1]

                y_hat = self._network(*x)
                loss = self._loss(y_hat, y)
                self._optimizer.zero_grad()
                loss.backward()
                self._optimizer.step()