def update_targets( self, copy_percentage: float = 0.005, *, metric_writer: Writer = None ) -> None: """ Interpolation factor in polyak averaging for target networks. Target networks are updated towards main networks according to: \theta_{\text{targ}} \leftarrow \rho \theta_{\text{targ}} + (1-\rho) \theta where \rho is polyak. (Always between 0 and 1, usually close to 1.) @param metric_writer: @type metric_writer: @param copy_percentage: @return: """ if metric_writer: metric_writer.blip("Target Models Synced", self.update_i) update_target( target_model=self.critic_1_target, source_model=self.critic_1, copy_percentage=copy_percentage, ) update_target( target_model=self.critic_2_target, source_model=self.critic_2, copy_percentage=copy_percentage, )
def _update(self, *, metric_writer: Writer = MockWriter()) -> None: """ @param metric_writer: @return: """ loss_ = math.inf if self.update_i > self._initial_observation_period: if is_zero_or_mod_zero(self._learning_frequency, self.update_i): if len(self._memory_buffer) > self._batch_size: transitions = self._memory_buffer.sample(self._batch_size) td_error, Q_expected, Q_state = self._td_error(transitions) td_error = td_error.detach().squeeze(-1).cpu().numpy() if self._use_per: self._memory_buffer.update_last_batch(td_error) loss = self._loss_function(Q_state, Q_expected) self._optimiser.zero_grad() loss.backward() self.post_process_gradients(self.value_model.parameters()) self._optimiser.step() loss_ = to_scalar(loss) if metric_writer: metric_writer.scalar("td_error", td_error.mean(), self.update_i) metric_writer.scalar("loss", loss_, self.update_i) if self._scheduler: self._scheduler.step() if metric_writer: for i, param_group in enumerate( self._optimiser.param_groups): metric_writer.scalar(f"lr{i}", param_group["lr"], self.update_i) else: logging.info( "Batch size is larger than current memory size, skipping update" ) if self._double_dqn: if is_zero_or_mod_zero(self._sync_target_model_frequency, self.update_i): update_target( target_model=self._target_value_model, source_model=self.value_model, copy_percentage=self._copy_percentage, ) if metric_writer: metric_writer.blip("Target Model Synced", self.update_i) return loss_
def _update_targets(self, copy_percentage: float, *, metric_writer: Writer = None) -> None: """ @param copy_percentage: @return: """ if metric_writer: metric_writer.blip("Target Model Synced", self.update_i) update_target( target_model=self._target_actor_critic, source_model=self.actor_critic, copy_percentage=copy_percentage, )
def update_targets(self, update_percentage: float, *, metric_writer: Writer = None) -> None: """ @param update_percentage: @return: """ with torch.no_grad(): if metric_writer: metric_writer.blip("Target Model Synced", self.update_i) update_target( target_model=self._target_critic, source_model=self._critic, copy_percentage=update_percentage, ) update_target( target_model=self._target_actor, source_model=self._actor, copy_percentage=update_percentage, )
def train_siamese( model: Module, optimiser: Optimizer, criterion: callable, *, writer: Writer = MockWriter(), train_number_epochs: int, data_dir: Path, train_batch_size: int, model_name: str, save_path: Path, save_best: bool = False, img_size: Tuple[int, int], validation_interval: int = 1, ): """ :param img_size: :type img_size: :param validation_interval: :type validation_interval: :param data_dir: :type data_dir: :param optimiser: :type optimiser: :param criterion: :type criterion: :param writer: :type writer: :param model_name: :type model_name: :param save_path: :type save_path: :param save_best: :type save_best: :param model: :type model: :param train_number_epochs: :type train_number_epochs: :param train_batch_size: :type train_batch_size: :return: :rtype: """ train_dataloader = DataLoader( PairDataset( data_path=data_dir, transform=transforms.Compose([ transforms.Grayscale(), transforms.Resize(img_size), transforms.ToTensor(), ]), split=Split.Training, ), shuffle=True, num_workers=4, batch_size=train_batch_size, ) valid_dataloader = DataLoader( PairDataset( data_path=data_dir, transform=transforms.Compose([ transforms.Grayscale(), transforms.Resize(img_size), transforms.ToTensor(), ]), split=Split.Validation, ), shuffle=True, num_workers=4, batch_size=train_batch_size, ) best = math.inf E = tqdm(range(0, train_number_epochs)) batch_counter = count() for epoch in E: for tss in train_dataloader: batch_i = next(batch_counter) with TorchTrainSession(model): o = [t.to(global_torch_device()) for t in tss] optimiser.zero_grad() loss_contrastive = criterion(model(*o[:2]), o[2].to(dtype=torch.float)) loss_contrastive.backward() optimiser.step() train_loss = loss_contrastive.cpu().item() writer.scalar("train_loss", train_loss, batch_i) if batch_counter.__next__() % validation_interval == 0: with TorchEvalSession(model): for tsv in valid_dataloader: ov = [t.to(global_torch_device()) for t in tsv] v_o, fact = model(*ov[:2]), ov[2].to(dtype=torch.float) valid_loss = criterion(v_o, fact).cpu().item() valid_accuracy = (accuracy(distances=v_o, is_diff=fact).cpu().item()) writer.scalar("valid_loss", valid_loss, batch_i) if valid_loss < best: best = valid_loss print(f"new best {best}") writer.blip("new_best", batch_i) if save_best: save_model_parameters( model, optimiser=optimiser, model_name=model_name, save_directory=save_path, ) E.set_description( f"Epoch number {epoch}, Current train loss {train_loss}, valid loss {valid_loss}, valid_accuracy {valid_accuracy}" ) return model