Ejemplo n.º 1
0
  def update_targets(
      self, copy_percentage: float = 0.005, *, metric_writer: Writer = None
      ) -> None:
    """

Interpolation factor in polyak averaging for target networks. Target networks are updated towards main
networks according to:

\theta_{\text{targ}} \leftarrow
\rho \theta_{\text{targ}} + (1-\rho) \theta

where \rho is polyak. (Always between 0 and 1, usually close to 1.)

@param metric_writer:
@type metric_writer:
@param copy_percentage:
@return:
"""
    if metric_writer:
      metric_writer.blip("Target Models Synced", self.update_i)

    update_target(
        target_model=self.critic_1_target,
        source_model=self.critic_1,
        copy_percentage=copy_percentage,
        )

    update_target(
        target_model=self.critic_2_target,
        source_model=self.critic_2,
        copy_percentage=copy_percentage,
        )
Ejemplo n.º 2
0
    def _update(self, *, metric_writer: Writer = MockWriter()) -> None:
        """

@param metric_writer:
@return:
"""

        loss_ = math.inf

        if self.update_i > self._initial_observation_period:
            if is_zero_or_mod_zero(self._learning_frequency, self.update_i):
                if len(self._memory_buffer) > self._batch_size:
                    transitions = self._memory_buffer.sample(self._batch_size)

                    td_error, Q_expected, Q_state = self._td_error(transitions)
                    td_error = td_error.detach().squeeze(-1).cpu().numpy()

                    if self._use_per:
                        self._memory_buffer.update_last_batch(td_error)

                    loss = self._loss_function(Q_state, Q_expected)

                    self._optimiser.zero_grad()
                    loss.backward()
                    self.post_process_gradients(self.value_model.parameters())
                    self._optimiser.step()

                    loss_ = to_scalar(loss)
                    if metric_writer:
                        metric_writer.scalar("td_error", td_error.mean(),
                                             self.update_i)
                        metric_writer.scalar("loss", loss_, self.update_i)

                    if self._scheduler:
                        self._scheduler.step()
                        if metric_writer:
                            for i, param_group in enumerate(
                                    self._optimiser.param_groups):
                                metric_writer.scalar(f"lr{i}",
                                                     param_group["lr"],
                                                     self.update_i)

                else:
                    logging.info(
                        "Batch size is larger than current memory size, skipping update"
                    )

            if self._double_dqn:
                if is_zero_or_mod_zero(self._sync_target_model_frequency,
                                       self.update_i):
                    update_target(
                        target_model=self._target_value_model,
                        source_model=self.value_model,
                        copy_percentage=self._copy_percentage,
                    )
                if metric_writer:
                    metric_writer.blip("Target Model Synced", self.update_i)

        return loss_
Ejemplo n.º 3
0
    def _update_targets(self,
                        copy_percentage: float,
                        *,
                        metric_writer: Writer = None) -> None:
        """

@param copy_percentage:
@return:
"""
        if metric_writer:
            metric_writer.blip("Target Model Synced", self.update_i)

        update_target(
            target_model=self._target_actor_critic,
            source_model=self.actor_critic,
            copy_percentage=copy_percentage,
        )
Ejemplo n.º 4
0
    def update_targets(self,
                       update_percentage: float,
                       *,
                       metric_writer: Writer = None) -> None:
        """

@param update_percentage:
@return:
"""
        with torch.no_grad():
            if metric_writer:
                metric_writer.blip("Target Model Synced", self.update_i)

            update_target(
                target_model=self._target_critic,
                source_model=self._critic,
                copy_percentage=update_percentage,
            )
            update_target(
                target_model=self._target_actor,
                source_model=self._actor,
                copy_percentage=update_percentage,
            )
Ejemplo n.º 5
0
def train_siamese(
    model: Module,
    optimiser: Optimizer,
    criterion: callable,
    *,
    writer: Writer = MockWriter(),
    train_number_epochs: int,
    data_dir: Path,
    train_batch_size: int,
    model_name: str,
    save_path: Path,
    save_best: bool = False,
    img_size: Tuple[int, int],
    validation_interval: int = 1,
):
    """
:param img_size:
:type img_size:
:param validation_interval:
:type validation_interval:
:param data_dir:
:type data_dir:
:param optimiser:
:type optimiser:
:param criterion:
:type criterion:
:param writer:
:type writer:
:param model_name:
:type model_name:
:param save_path:
:type save_path:
:param save_best:
:type save_best:
:param model:
:type model:
:param train_number_epochs:
:type train_number_epochs:
:param train_batch_size:
:type train_batch_size:
:return:
:rtype:
"""

    train_dataloader = DataLoader(
        PairDataset(
            data_path=data_dir,
            transform=transforms.Compose([
                transforms.Grayscale(),
                transforms.Resize(img_size),
                transforms.ToTensor(),
            ]),
            split=Split.Training,
        ),
        shuffle=True,
        num_workers=4,
        batch_size=train_batch_size,
    )

    valid_dataloader = DataLoader(
        PairDataset(
            data_path=data_dir,
            transform=transforms.Compose([
                transforms.Grayscale(),
                transforms.Resize(img_size),
                transforms.ToTensor(),
            ]),
            split=Split.Validation,
        ),
        shuffle=True,
        num_workers=4,
        batch_size=train_batch_size,
    )

    best = math.inf

    E = tqdm(range(0, train_number_epochs))
    batch_counter = count()

    for epoch in E:
        for tss in train_dataloader:
            batch_i = next(batch_counter)
            with TorchTrainSession(model):
                o = [t.to(global_torch_device()) for t in tss]
                optimiser.zero_grad()
                loss_contrastive = criterion(model(*o[:2]),
                                             o[2].to(dtype=torch.float))
                loss_contrastive.backward()
                optimiser.step()
                train_loss = loss_contrastive.cpu().item()
                writer.scalar("train_loss", train_loss, batch_i)
            if batch_counter.__next__() % validation_interval == 0:
                with TorchEvalSession(model):
                    for tsv in valid_dataloader:
                        ov = [t.to(global_torch_device()) for t in tsv]
                        v_o, fact = model(*ov[:2]), ov[2].to(dtype=torch.float)
                        valid_loss = criterion(v_o, fact).cpu().item()
                        valid_accuracy = (accuracy(distances=v_o,
                                                   is_diff=fact).cpu().item())
                        writer.scalar("valid_loss", valid_loss, batch_i)
                        if valid_loss < best:
                            best = valid_loss
                            print(f"new best {best}")
                            writer.blip("new_best", batch_i)
                            if save_best:
                                save_model_parameters(
                                    model,
                                    optimiser=optimiser,
                                    model_name=model_name,
                                    save_directory=save_path,
                                )
            E.set_description(
                f"Epoch number {epoch}, Current train loss {train_loss}, valid loss {valid_loss}, valid_accuracy {valid_accuracy}"
            )

    return model