def fit(self, *args, **kwargs): for t in trange(self._n_epochs, disable=self._quiet): if self._batch_size > 0: batches = minibatch_generator(self._batch_size, *args) else: batches = [args] loss_current = list() for batch in tqdm(batches, disable=self._quiet): if self._device is None: torch_args = [torch.from_numpy(x) for x in batch] else: torch_args = [ torch.from_numpy(x).cuda(self._device) for x in args ] x = torch_args[:-1] y = torch_args[-1] y_hat = self._network(*x, **kwargs) loss = self._loss(y_hat, y.type(y_hat.type())) loss_current.append(loss.item()) self._optimizer.zero_grad() loss.backward() self._optimizer.step() if not self._quiet: tqdm.write('loss at epoch ' + str(t) + ' ' + str(np.mean(loss_current)))
def fit(self, *args): if self._batch_size > 0: batches = minibatch_generator(self._batch_size, args) else: batches = [args] for _ in trange(self._n_epochs, disable=self._quiet): for batch in tqdm(batches, disable=self._quiet): if len(args) == 3: s = torch.from_numpy(batch[0]) a = torch.from_numpy(batch[1]).long() q = torch.from_numpy(batch[2]) if self._device is None: x = [s, a] y = q else: x = [s.cuda(self._device), a.cuda(self._device)] y = q.cuda(self._device) elif len(args) == 2: if self._device is None: x = [torch.from_numpy(batch[0])] y = torch.from_numpy(batch[1]) else: x = [torch.from_numpy(batch[0]).cuda(self._device)] y = torch.from_numpy(batch[1].cuda(self._device)) y_hat = self._network(*x) loss = self._loss(y_hat, y) self._optimizer.zero_grad() loss.backward() self._optimizer.step()
def _fit_epoch(self, args, kwargs): if self._batch_size > 0: batches = minibatch_generator(self._batch_size, *args) else: batches = [args] loss_current = list() for batch in batches: loss_current.append(self._fit_batch(batch, args, kwargs)) return np.mean(loss_current)
def _update_policy(self, obs, act, adv, old_log_p): for epoch in range(self._n_epochs_policy): for obs_i, act_i, adv_i, old_log_p_i in minibatch_generator( self._batch_size, obs, act, adv, old_log_p): self._optimizer.zero_grad() prob_ratio = torch.exp( self.policy.log_prob_t(obs_i, act_i) - old_log_p_i ) clipped_ratio = torch.clamp(prob_ratio, 1 - self._eps_ppo, 1 + self._eps_ppo) loss = -torch.mean(torch.min(prob_ratio * adv_i, clipped_ratio * adv_i)) loss.backward() self._optimizer.step()
def fit(self, *args, **kwargs): if self._dropout: self._network.train() for t in trange(self._n_epochs, disable=self._quiet): if self._batch_size > 0: batches = minibatch_generator(self._batch_size, *args) else: batches = [args] loss_current = list() for batch in tqdm(batches, disable=self._quiet): if not self._use_cuda: torch_args = [torch.from_numpy(x) for x in batch] else: torch_args = [torch.from_numpy(x).cuda() for x in args] x = torch_args[:-1] y_hat = self._network(*x, **kwargs) if isinstance(y_hat, tuple): output_type = y_hat[0].dtype else: output_type = y_hat.dtype y = torch.tensor(torch_args[-1], dtype=output_type) if self._use_cuda: y = y.cuda() loss = self._loss(y_hat, y) loss_current.append(loss.item()) self._optimizer.zero_grad() loss.backward() self._optimizer.step() if not self._quiet: tqdm.write('loss at epoch ' + str(t) + ' ' + str(np.mean(loss_current))) if self._dropout: self._network.eval()
def fit(self, *args): if self._batch_size > 0: batches = minibatch_generator(self._batch_size, args) else: batches = [args] for _ in trange(self._n_epochs, disable=self._quiet): for batch in tqdm(batches, disable=self._quiet): if self._device is None: torch_args = [torch.from_numpy(x) for x in batch] else: torch_args = [ torch.from_numpy(x).cuda(self._device) for x in args ] x = torch_args[:-1] y = torch_args[-1] y_hat = self._network(*x) loss = self._loss(y_hat, y) self._optimizer.zero_grad() loss.backward() self._optimizer.step()