def fit(self, train_loader: DataLoader, val_loader: DataLoader, model_manager: ModelManager, max_epoch:Union[None, int]=None, max_step:Union[None, int]=None, stop_patience:Union[None, int]=2, steps_betw_evals=200): model_manager.reset_model_weights() num_train_steps = self._calc_num_train_steps(max_epoch, max_step, len(train_loader)) self.weights_updater.prepare_for_fit(model_manager, num_train_steps) losses = [] while True: for batch in tqdm(train_loader, mininterval=1): if self._early_stopping(max_epoch, max_step, stop_patience): return loss_val = self.weights_updater.fit_with_batch(model_manager, batch) losses.append(loss_val) if (self.step_nb + 1) % steps_betw_evals == 0: del batch self._eval_save_if_need(model_manager, val_loader) print("Mean losses:", np.mean(losses)) losses = [] if (self.step_nb + 1) % 200 == 0: print('Mean losses:', np.mean(losses)) losses = [] self.step_nb += 1 self.epoch_nb += 1
def _calc_loss(self, manager: ModelManager, inputs, labels) -> torch.Tensor: with amp.autocast(enabled=self.use_amp): preds, labels = manager.preproc_forward(inputs, labels) # print("preds", preds) loss = self.criterion(preds.view(-1, 2), labels.view(-1).long()) loss = loss / self.accum_iters return loss
def get_model_manager(model=None, device=None) -> ModelManager: if device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if model is None: model = get_model() data_processor = get_rucos_processor() return ModelManager(model, data_processor, device=device)
def _create_manager(self, run_model_kwargs, run_proc_kwargs) -> ModelManager: model_kwargs = self.std_model_kwargs.copy() model_kwargs.update(run_model_kwargs) proc_kwargs = self.std_processor_kwargs.copy() proc_kwargs.update(run_proc_kwargs) model = SentPairBinaryClassifier(self.mname, **model_kwargs) proc = RucosProcessor(self.mname, **proc_kwargs) return ModelManager(model, proc, device=self.device)
def test_save_then_load(self): saver = std_objects.get_local_saver() model_name = shared_objs.mod_manager.save_model(saver) del shared_objs.model self.assertIsInstance(model_name, str) del shared_objs.mod_manager loaded_manager = ModelManager.load(saver, model_name) self.assertIsInstance(loaded_manager, ModelManager) shared_objs.model = std_objects.get_model() shared_objs.mod_manager = std_objects.get_model_manager( model=shared_objs.model)
def prepare_for_fit(self, model_manager: ModelManager, nb_train_steps: int): self.optimizer = optim.AdamW( [{ 'params': model_manager.get_model().transformer.parameters() }, { 'params': model_manager.get_model().head.parameters(), 'lr': self.lr_head }], lr=self.lr, weight_decay=self.weight_decay) total_steps = nb_train_steps // self.accum_iters total_steps = max(total_steps, 1) # if nb_train_steps < self.accum_iters self.lr_scheduler = transformers.get_polynomial_decay_schedule_with_warmup( optimizer=self.optimizer, num_warmup_steps=self.warmup, num_training_steps=total_steps, lr_end=self.lr_end)
def _get_placeholders_probs_dataframe(self, manager: ModelManager, loader: DataLoader) -> pd.DataFrame: res = { 'idx': [], 'probs': [], 'start': [], 'end': [], 'placeholder': [] } for text1, text2, idx, start, end, placeholder in tqdm(loader, mininterval=1): idx, probs, start, end, placeholder = manager.predict_postproc((text1, text2, idx, start, end, placeholder)) res['idx'].extend(idx) if probs.ndim > 1: res['probs'].extend(probs[:, 1].tolist()) else: try: res["probs"].extend([probs[1]]) except Exception: res["probs"].extend([-999]) res['start'].extend(start) res['end'].extend(end) res['placeholder'].extend(placeholder) return pd.DataFrame(data=res)
def load_best_manager(self) -> ModelManager: if self.best_model_name is None: raise ValueError("Model was not saved") return ModelManager.load(self.saver, self.best_model_name)