Example #1
0
 def on_end(self, data: Data) -> None:
     data.write_with_log(
         "total_time",
         "{} sec".format(round(time.perf_counter() - self.train_start, 2)))
     for model in self.system.network.models:
         if hasattr(model, "current_optimizer"):
             data.write_with_log(model.model_name + "_lr", get_lr(model))
Example #2
0
 def on_batch_end(self, data: Data) -> None:
     if self.system.mode == "train" and isinstance(self.lr_fn, ARC):
         self.lr_fn.accumulate_single_train_loss(data[min(self.model.loss_name)].numpy())
     if self.system.mode == "train" and self.system.log_steps and (self.system.global_step % self.system.log_steps
                                                                   == 0 or self.system.global_step == 1):
         current_lr = np.float32(get_lr(self.model))
         data.write_with_log(self.outputs[0], current_lr)
Example #3
0
 def on_epoch_end(self, data: Data) -> None:
     if self.system.mode == "eval" and isinstance(self.lr_fn, ARC):
         self.lr_fn.accumulate_single_eval_loss(data[min(self.model.loss_name)])
         if self.system.epoch_idx % self.lr_fn.frequency == 0:
             self.lr_fn.gather_multiple_eval_losses()
     if self.system.mode == "train" and isinstance(self.lr_fn,
                                                   ARC) and self.system.epoch_idx % self.lr_fn.frequency == 0:
         self.lr_fn.accumulate_all_lrs(get_lr(model=self.model))
         self.lr_fn.gather_multiple_train_losses()
Example #4
0
 def on_epoch_end(self, data: Data) -> None:
     if self.monitor_op(data[self.inputs[0]], self.best):
         self.best = data[self.inputs[0]]
         self.wait = 0
     else:
         self.wait += 1
         if self.wait >= self.patience:
             new_lr = max(self.min_lr, np.float32(self.factor * get_lr(self.model)))
             set_lr(self.model, new_lr)
             self.wait = 0
             data.write_with_log(self.outputs[0], new_lr)
             print("FastEstimator-ReduceLROnPlateau: learning rate reduced to {}".format(new_lr))
Example #5
0
 def on_epoch_begin(self, data: Data) -> None:
     if self.system.mode == "train" and self.schedule_mode == "epoch":
         if isinstance(self.lr_fn, ARC):
             if self.system.epoch_idx > 1 and (self.system.epoch_idx % self.lr_fn.frequency == 1
                                               or self.lr_fn.frequency == 1):
                 multiplier = self.lr_fn.predict_next_multiplier()
                 new_lr = np.float32(get_lr(model=self.model) * multiplier)
                 set_lr(self.model, new_lr)
                 print("FastEstimator-ARC: Multiplying LR by {}".format(multiplier))
         else:
             new_lr = np.float32(self.lr_fn(self.system.epoch_idx))
             set_lr(self.model, new_lr)
Example #6
0
def save_model(model: Union[tf.keras.Model, torch.nn.Module],
               save_dir: str,
               model_name: Optional[str] = None,
               save_optimizer: bool = False):
    """Save `model` weights to a specific directory.

    This method can be used with TensorFlow models:
    ```python
    m = fe.build(fe.architecture.tensorflow.LeNet, optimizer_fn="adam")
    fe.backend.save_model(m, save_dir="/tmp", model_name="test")  # Generates 'test.h5' file inside /tmp directory
    ```

    This method can be used with PyTorch models:
    ```python
    m = fe.build(fe.architecture.pytorch.LeNet, optimizer_fn="adam")
    fe.backend.save_model(m, save_dir="/tmp", model_name="test")  # Generates 'test.pt' file inside /tmp directory
    ```

    Args:
        model: A neural network instance to save.
        save_dir: Directory into which to write the `model` weights.
        model_name: The name of the model (used for naming the weights file). If None, model.model_name will be used.
        save_optimizer: Whether to save optimizer. If True, optimizer will be saved in a separate file at same folder.

    Returns:
        The saved model path.

    Raises:
        ValueError: If `model` is an unacceptable data type.
    """
    assert hasattr(
        model,
        "fe_compiled") and model.fe_compiled, "model must be built by fe.build"
    if model_name is None:
        model_name = model.model_name
    save_dir = os.path.normpath(save_dir)
    os.makedirs(save_dir, exist_ok=True)
    if isinstance(model, tf.keras.Model):
        model_path = os.path.join(save_dir, "{}.h5".format(model_name))
        model.save_weights(model_path)
        if save_optimizer:
            assert model.current_optimizer, "optimizer does not exist"
            optimizer_path = os.path.join(save_dir,
                                          "{}_opt.pkl".format(model_name))
            with open(optimizer_path, 'wb') as f:
                saved_data = {
                    'weights': model.current_optimizer.get_weights(),
                    'lr': get_lr(model)
                }
                if isinstance(
                        model.current_optimizer,
                        tfa.optimizers.DecoupledWeightDecayExtension
                ) or hasattr(model.current_optimizer,
                             "inner_optimizer") and isinstance(
                                 model.current_optimizer.inner_optimizer,
                                 tfa.optimizers.DecoupledWeightDecayExtension):
                    saved_data['weight_decay'] = tf.keras.backend.get_value(
                        model.current_optimizer.weight_decay)
                pickle.dump(saved_data, f)
        return model_path
    elif isinstance(model, torch.nn.Module):
        model_path = os.path.join(save_dir, "{}.pt".format(model_name))
        torch.save(model.state_dict(), model_path)
        if save_optimizer:
            assert model.current_optimizer, "optimizer does not exist"
            optimizer_path = os.path.join(save_dir,
                                          "{}_opt.pt".format(model_name))
            torch.save(model.current_optimizer.state_dict(), optimizer_path)
        return model_path
    else:
        raise ValueError("Unrecognized model instance {}".format(type(model)))
Example #7
0
 def on_batch_end(self, data: Data) -> None:
     if self.system.log_steps and (self.system.global_step % self.system.log_steps == 0
                                   or self.system.global_step == 1):
         current_lr = np.float32(get_lr(self.model))
         data.write_with_log(self.outputs[0], current_lr)