def __init__(self, model, cbs): self.model = model # Dummy model just to see if the checkpoint callback works #self.save_dir = save_dir #self.model_name = "DummyModel.bin" self.scheduler = None self.optimizer = torch.optim.SGD(self.model.parameters(), lr=3e-4) self._stop_training = False self._model_state = None self.cb = CallbackRunner([cbs]) self.device = "cpu" self.cb.set_experiment(self) self.exp_logs = {} self.epoch_key = "Epoch" # if not os.path.exists(self.save_dir): # os.mkdir(self.save_dir) self.path = cbs.path #assert os.path.exists(self.save_dir) self.scheduler_stepper = None # print(type(self.path)) self.cb_lc = CallbackRunner( [LoadCheckpoint(self.path)] ) # callback runner for load_checkpoint. Since we test model only after training done. self.cb_lc.set_experiment(self)
def __init__(self, cbs): self.stop_training = False self._model_state = None self.cb = CallbackRunner(cbs) self.cb.set_experiment(self) self.exp_logs = {}
def _set_callbacks(self, callbacks: List): default_callbacks = [ProgressBar(), History()] if callbacks is not None: default_callbacks.extend(callbacks) default_callbacks = sort_callbacks(default_callbacks) self._callback_runner = CallbackRunner(callbacks=default_callbacks) self._callback_runner.set_experiment(self)
class DummyPipeline: def __init__(self, cbs): self._stop_training = False self._model_state = None self.save_dir = None self.history = cbs[0] self.cb = CallbackRunner(cbs) self.cb.set_experiment(self) self.exp_logs = {} @property def set_model_state(self): return self._model_state @set_model_state.setter def set_model_state(self, state): self._model_state = state if self.cb is not None: self.cb(current_state=self._model_state) def fit(self): train_loss = 0.01 val_loss = 0.02 train_acc = 10 val_acc = 10 self.set_model_state = "on_experiment_start" for epoch in range(10): start = time.time() train_loss += 0.01 val_loss += 0.02 train_acc += 0.2 val_acc += 0.2 time.sleep(0.1) # print(epoch) logs = { "Epoch": epoch, "train_loss": train_loss, "val_loss": val_loss, "val_acc": val_acc, "train_acc": train_acc, "Time": (time.time() - start), } self.exp_logs.update(logs) self.set_model_state = "on_epoch_end" if self._stop_training: break self.set_model_state = "on_experiment_end"
class BaseState: """Class to set user and internal variables along with some utility functions.""" def __init__( self, num_epochs: int, fp16: bool, device: str, seed: int = 42, ): """Init method to set up important variables for training and validation. Args: num_epochs : The number of epochs to save model. fp16 : Set this to True if you want to use mixed precision training(Default : False) device : The device where you want train your model. seed: The seed to ensure reproducibility. Note: If batch_mixers are used then set compute_train_metrics to False. Also, only validation metrics will be computed if special batch_mixers are used. """ self.num_epochs = num_epochs self.fp16 = fp16 self.device = device self.seed = seed self.train_key, self.val_key, self.epoch_key = "train_", "val_", "Epoch" self.scaler = torch.cuda.amp.GradScaler() if self.fp16 else None self.model = None self.resume_checkpoint = None self.main_metric = None self.stop_training = None self.experiment_state = None self._callback_runner = None self.optimizer = None self.criterion = None self.exp_logs = None self.history = None self._train_monitor, self._val_monitor = None, None self._metric_runner = None self.train_dl, self.valid_dl = None, None self.loss, self.loss_meter = None, None self.x, self.y = None, None self.preds = None self.is_training = None self.metrics = None self.batch_idx, self.current_epoch = None, 0 self._step, self._iterator = None, None self.plot_dir = "./plots" def get_prefix(self): """Generates the prefix for training and validation. Returns: The prefix for training or validation. """ return self.train_key if self.is_training else self.val_key def _set_callbacks(self, callbacks: List): default_callbacks = [ProgressBar(), History()] if callbacks is not None: default_callbacks.extend(callbacks) default_callbacks = sort_callbacks(default_callbacks) self._callback_runner = CallbackRunner(callbacks=default_callbacks) self._callback_runner.set_experiment(self) def _set_metrics(self, metrics): if metrics is not None: self._metric_runner = metric_utils.MetricContainer(metrics=metrics) self._metric_runner.set_experiment(self) def _set_params(self, optimizer_params): if "model_params" in optimizer_params: grad_params = optimizer_params.pop("model_params") else: grad_params = (param for param in self.model.parameters() if param.requires_grad) return grad_params def _set_optimizer(self, optimizer, optimizer_params): grad_params = self._set_params(optimizer_params=optimizer_params) if isinstance(optimizer, str): self.optimizer = get_optimizer(optimizer)(grad_params, **optimizer_params) else: self.optimizer = optimizer def _set_criterion(self, criterion): self.criterion = get_criterion(criterion=criterion) self.loss_meter = AvgLoss() self.loss_meter.set_experiment(self) @staticmethod def _dataloader_from_data(args, dataloader_kwargs): args = numpy_to_torch(args) dataset = TensorDataset(*args) if len(args) > 1 else args[0] return DataLoader(dataset, **dataloader_kwargs) def _model_to_device(self): """Function to move model to device.""" if next(self.model.parameters()).is_cuda is False: self.model.to(self.device) def _reset_model_logs(self): if bool(self.exp_logs): self.exp_logs = None def initialise(self): """Method initialise some stuff.""" seed_all(self.seed) self._model_to_device() self._reset_model_logs() def cleanup(self): """Method Cleanup internal variables.""" self._train_monitor, self._val_monitor, self.exp_logs = None, None, {} self.experiment_state = None self.loss, self.x, self.y, self.preds = None, None, None, None self.batch_idx, self.current_epoch = None, None self._step, self._iterator = None, None def _create_plots(self, key): for k, v in self.history.items(): if key in k: plt.plot(self.history.get(self.epoch_key), v, "-o", label=k) plt.title(f"{key.upper()}/{self.epoch_key.upper()}", fontweight="bold") plt.ylabel(f"{key.upper()}", fontweight="bold") plt.xlabel(self.epoch_key.upper(), fontweight="bold") plt.grid(True) plt.legend(loc="upper left") def _save_fig(self, save: bool, key: str): if save is not None: if not os.path.exists(self.plot_dir): os.mkdir(self.plot_dir) save_path = os.path.join(self.plot_dir, f"{key}-vs-{self.epoch_key.lower()}.jpg") plt.savefig(save_path, dpi=150) def plot_history(self, keys: List[str], plot_fig: bool = True, save_fig: bool = False): """Method to plot model history. Args: keys: A key value in lower case. Ex accuracy or loss save_fig: Set to True if you want to save_fig. plot_fig: Whether to plot the figure. """ for key in keys: plt.style.use("seaborn") f = plt.figure() self._create_plots(key=key) self._save_fig(save=save_fig, key=key) if plot_fig: plt.show() plt.close(f)
class DummyPipeline: def __init__(self, model, cbs): self.model = model # Dummy model just to see if the checkpoint callback works #self.save_dir = save_dir #self.model_name = "DummyModel.bin" self.scheduler = None self.optimizer = torch.optim.SGD(self.model.parameters(), lr=3e-4) self._stop_training = False self._model_state = None self.cb = CallbackRunner([cbs]) self.device = "cpu" self.cb.set_experiment(self) self.exp_logs = {} self.epoch_key = "Epoch" # if not os.path.exists(self.save_dir): # os.mkdir(self.save_dir) self.path = cbs.path #assert os.path.exists(self.save_dir) self.scheduler_stepper = None # print(type(self.path)) self.cb_lc = CallbackRunner( [LoadCheckpoint(self.path)] ) # callback runner for load_checkpoint. Since we test model only after training done. self.cb_lc.set_experiment(self) @property def set_model_state(self): return self._model_state @set_model_state.setter def set_model_state(self, state): self._model_state = state if self.cb is not None: self.cb(current_state=self._model_state) def fit(self): train_loss = 0.01 val_loss = 0.02 train_acc = 10 val_acc = 10 self.set_model_state = "on_experiment_start" for epoch in range(10): train_loss += 0.01 val_loss -= 0.02 train_acc += 0.2 val_acc += 0.2 # print(epoch) logs = { "Epoch": epoch, "train_loss": train_loss, "val_loss": val_loss, "val_acc": val_acc, "train_acc": train_acc, } self.exp_logs.update(logs) self.set_model_state = "on_epoch_end" if self._stop_training: break self.set_model_state = "on_experiment_end" assert os.path.exists(self.path) def check_checkpoints(self): assert os.path.exists(self.path) ckpt = torch.load(self.path) model_dict = self.model.state_dict() self.cb_lc(current_state="on_experiment_start") for layer_name, weight in ckpt["model_state_dict"].items(): assert layer_name in model_dict assert torch.all(model_dict[layer_name] == weight) assert self.optimizer.state_dict() == ckpt["optimizer_state_dict"]