def on_end(self, data: Data) -> None: data.write_with_log( "total_time", "{} sec".format(round(time.perf_counter() - self.train_start, 2))) for model in self.system.network.models: if hasattr(model, "current_optimizer"): data.write_with_log(model.model_name + "_lr", get_lr(model))
def on_batch_end(self, data: Data) -> None: if self.system.mode == "train" and isinstance(self.lr_fn, ARC): self.lr_fn.accumulate_single_train_loss(data[min(self.model.loss_name)].numpy()) if self.system.mode == "train" and self.system.log_steps and (self.system.global_step % self.system.log_steps == 0 or self.system.global_step == 1): current_lr = np.float32(get_lr(self.model)) data.write_with_log(self.outputs[0], current_lr)
def on_epoch_end(self, data: Data) -> None: if self.system.mode == "eval" and isinstance(self.lr_fn, ARC): self.lr_fn.accumulate_single_eval_loss(data[min(self.model.loss_name)]) if self.system.epoch_idx % self.lr_fn.frequency == 0: self.lr_fn.gather_multiple_eval_losses() if self.system.mode == "train" and isinstance(self.lr_fn, ARC) and self.system.epoch_idx % self.lr_fn.frequency == 0: self.lr_fn.accumulate_all_lrs(get_lr(model=self.model)) self.lr_fn.gather_multiple_train_losses()
def on_epoch_end(self, data: Data) -> None: if self.monitor_op(data[self.inputs[0]], self.best): self.best = data[self.inputs[0]] self.wait = 0 else: self.wait += 1 if self.wait >= self.patience: new_lr = max(self.min_lr, np.float32(self.factor * get_lr(self.model))) set_lr(self.model, new_lr) self.wait = 0 data.write_with_log(self.outputs[0], new_lr) print("FastEstimator-ReduceLROnPlateau: learning rate reduced to {}".format(new_lr))
def on_epoch_begin(self, data: Data) -> None: if self.system.mode == "train" and self.schedule_mode == "epoch": if isinstance(self.lr_fn, ARC): if self.system.epoch_idx > 1 and (self.system.epoch_idx % self.lr_fn.frequency == 1 or self.lr_fn.frequency == 1): multiplier = self.lr_fn.predict_next_multiplier() new_lr = np.float32(get_lr(model=self.model) * multiplier) set_lr(self.model, new_lr) print("FastEstimator-ARC: Multiplying LR by {}".format(multiplier)) else: new_lr = np.float32(self.lr_fn(self.system.epoch_idx)) set_lr(self.model, new_lr)
def save_model(model: Union[tf.keras.Model, torch.nn.Module], save_dir: str, model_name: Optional[str] = None, save_optimizer: bool = False): """Save `model` weights to a specific directory. This method can be used with TensorFlow models: ```python m = fe.build(fe.architecture.tensorflow.LeNet, optimizer_fn="adam") fe.backend.save_model(m, save_dir="/tmp", model_name="test") # Generates 'test.h5' file inside /tmp directory ``` This method can be used with PyTorch models: ```python m = fe.build(fe.architecture.pytorch.LeNet, optimizer_fn="adam") fe.backend.save_model(m, save_dir="/tmp", model_name="test") # Generates 'test.pt' file inside /tmp directory ``` Args: model: A neural network instance to save. save_dir: Directory into which to write the `model` weights. model_name: The name of the model (used for naming the weights file). If None, model.model_name will be used. save_optimizer: Whether to save optimizer. If True, optimizer will be saved in a separate file at same folder. Returns: The saved model path. Raises: ValueError: If `model` is an unacceptable data type. """ assert hasattr( model, "fe_compiled") and model.fe_compiled, "model must be built by fe.build" if model_name is None: model_name = model.model_name save_dir = os.path.normpath(save_dir) os.makedirs(save_dir, exist_ok=True) if isinstance(model, tf.keras.Model): model_path = os.path.join(save_dir, "{}.h5".format(model_name)) model.save_weights(model_path) if save_optimizer: assert model.current_optimizer, "optimizer does not exist" optimizer_path = os.path.join(save_dir, "{}_opt.pkl".format(model_name)) with open(optimizer_path, 'wb') as f: saved_data = { 'weights': model.current_optimizer.get_weights(), 'lr': get_lr(model) } if isinstance( model.current_optimizer, tfa.optimizers.DecoupledWeightDecayExtension ) or hasattr(model.current_optimizer, "inner_optimizer") and isinstance( model.current_optimizer.inner_optimizer, tfa.optimizers.DecoupledWeightDecayExtension): saved_data['weight_decay'] = tf.keras.backend.get_value( model.current_optimizer.weight_decay) pickle.dump(saved_data, f) return model_path elif isinstance(model, torch.nn.Module): model_path = os.path.join(save_dir, "{}.pt".format(model_name)) torch.save(model.state_dict(), model_path) if save_optimizer: assert model.current_optimizer, "optimizer does not exist" optimizer_path = os.path.join(save_dir, "{}_opt.pt".format(model_name)) torch.save(model.current_optimizer.state_dict(), optimizer_path) return model_path else: raise ValueError("Unrecognized model instance {}".format(type(model)))
def on_batch_end(self, data: Data) -> None: if self.system.log_steps and (self.system.global_step % self.system.log_steps == 0 or self.system.global_step == 1): current_lr = np.float32(get_lr(self.model)) data.write_with_log(self.outputs[0], current_lr)