def on_begin(self, data: Data) -> None: if fe.fe_deterministic_seed is not None: raise RuntimeError( "You cannot use RestoreWizard while in deterministic training mode since a restored" + " training can't guarantee that all prngs will be reset to exactly the same position" ) if not self.should_restore(): self._cleanup( self.dirs) # Remove any partially completed checkpoints print("FastEstimator-RestoreWizard: Backing up to {}".format( self.directory)) else: self._load_key() directory = self.dirs[self.dir_idx] self.system.load_state(directory) data.write_with_log("epoch", self.system.epoch_idx) print( "FastEstimator-RestoreWizard: Restoring from {}, resume training" .format(directory)) self.dir_idx = int( not self.dir_idx ) # Flip the idx so that next save goes to other dir self._cleanup( self.dirs[self.dir_idx] ) # Clean out the other dir in case it had a partial save
def on_batch_end(self, data: Data) -> None: if self.system.mode == "train" and isinstance(self.lr_fn, ARC): self.lr_fn.accumulate_single_train_loss(data[min(self.model.loss_name)].numpy()) if self.system.mode == "train" and self.system.log_steps and (self.system.global_step % self.system.log_steps == 0 or self.system.global_step == 1): current_lr = np.float32(get_lr(self.model)) data.write_with_log(self.outputs[0], current_lr)
def on_end(self, data: Data) -> None: data.write_with_log( "total_time", "{} sec".format(round(time.perf_counter() - self.train_start, 2))) for model in self.system.network.models: if hasattr(model, "current_optimizer"): data.write_with_log(model.model_name + "_lr", get_lr(model))
def on_epoch_end(self, data: Data) -> None: for key, ds_vals in self.test_results.items(): for ds_id, vals in ds_vals.items(): if ds_id != '': d = DSData(ds_id, data) d.write_with_log(key, np.mean(np.array(vals), axis=0)) data.write_with_log( key, np.mean(np.array([e for x in ds_vals.values() for e in x]), axis=0))
def on_batch_end(self, data: Data) -> None: if self.system.log_steps and (self.system.global_step % self.system.log_steps == 0 or self.system.global_step == 1): for key in self.inputs: if key in data: data.write_with_log(key, data[key]) if self.system.global_step > 1: self.elapse_times.append(time.perf_counter() - self.step_start) data.write_with_log("steps/sec", round(self.system.log_steps / np.sum(self.elapse_times), 2)) self.elapse_times = [] self.step_start = time.perf_counter()
def on_begin(self, data: Data) -> None: if not os.path.exists(self.directory) or not os.listdir( self.directory): print("FastEstimator-RestoreWizard: Backing up in {}".format( self.directory)) else: self._scan_files() self._load_files() data.write_with_log("epoch", self.system.epoch_idx) print( "FastEstimator-RestoreWizard: Restoring from {}, resume training" .format(self.directory))
def on_epoch_end(self, data: Data) -> None: if self.monitor_op(data[self.inputs[0]], self.best): self.best = data[self.inputs[0]] self.wait = 0 else: self.wait += 1 if self.wait >= self.patience: new_lr = max(self.min_lr, np.float32(self.factor * get_lr(self.model))) set_lr(self.model, new_lr) self.wait = 0 data.write_with_log(self.outputs[0], new_lr) print("FastEstimator-ReduceLROnPlateau: learning rate reduced to {}".format(new_lr))
def on_epoch_end(self, data: Data) -> None: if self.binary_classification: score = f1_score(self.y_true, self.y_pred, average='binary', **self.kwargs) else: score = f1_score(self.y_true, self.y_pred, average=None, **self.kwargs) data.write_with_log(self.outputs[0], score)
def on_epoch_end(self, data: Data) -> None: if self.monitor_op(data[self.metric], self.best): self.best = data[self.metric] self.since_best = 0 if self.save_dir: self.model_path = save_model(self.model, self.save_dir, self.model_name) print("FastEstimator-BestModelSaver: Saved model to {}".format( self.model_path)) else: self.since_best += 1 data.write_with_log(self.outputs[0], self.since_best) data.write_with_log(self.outputs[1], self.best)
def on_epoch_end(self, data: Data) -> None: self.y_true = np.squeeze(np.stack(self.y_true)) self.y_pred = np.stack(self.y_pred) mid = round( cal.get_calibration_error(probs=self.y_pred, labels=self.y_true, mode=self.method), 4) low = None high = None if self.confidence_interval is not None: low, _, high = cal.get_calibration_error_uncertainties( probs=self.y_pred, labels=self.y_true, mode=self.method, alpha=self.confidence_interval) low = round(low, 4) high = round(high, 4) data.write_with_log( self.outputs[0], ValWithError(low, mid, high) if low is not None else mid)
def on_epoch_end(self, data: Data) -> None: data.write_with_log(self.outputs[0], self.correct / self.total)
def on_epoch_end(self, data: Data) -> None: for key, value_list in self.test_results.items(): data.write_with_log(key, np.mean(np.array(value_list), axis=0))
def on_epoch_end(self, data: Data) -> None: if self.system.log_steps: self.elapse_times.append(time.perf_counter() - self.step_start) data.write_with_log("epoch_time", "{} sec".format(round(time.perf_counter() - self.epoch_start, 2)))
def on_epoch_end(self, data: Data) -> None: data.write_with_log( self.outputs[0], matthews_corrcoef(y_true=self.y_true, y_pred=self.y_pred))
def on_begin(self, data: Data) -> None: self.train_start = time.perf_counter() data.write_with_log("num_device", self.system.num_devices) data.write_with_log("logging_interval", self.system.log_steps)
def on_epoch_end(self, data: Data) -> None: data.write_with_log(self.outputs[0], round(self.get_corpus_bleu_score(), 5))
def on_epoch_end(self, data: Data) -> None: data.write_with_log(self.outputs[0], self.matrix)
def on_batch_end(self, data: Data) -> None: if self.system.log_steps and (self.system.global_step % self.system.log_steps == 0 or self.system.global_step == 1): current_lr = np.float32(get_lr(self.model)) data.write_with_log(self.outputs[0], current_lr)