Exemple #1
0
    def fit(self, train_loader: DataLoader, val_loader: DataLoader, model_manager: ModelManager,
            max_epoch:Union[None, int]=None, max_step:Union[None, int]=None,
            stop_patience:Union[None, int]=2, steps_betw_evals=200):

        model_manager.reset_model_weights()
        num_train_steps = self._calc_num_train_steps(max_epoch, max_step, len(train_loader))
        self.weights_updater.prepare_for_fit(model_manager, num_train_steps)

        losses = []
        while True:
            for batch in tqdm(train_loader, mininterval=1):
                if self._early_stopping(max_epoch, max_step, stop_patience):
                    return
                loss_val = self.weights_updater.fit_with_batch(model_manager, batch)    
                losses.append(loss_val)

                if (self.step_nb + 1) % steps_betw_evals == 0:
                    del batch
                    self._eval_save_if_need(model_manager, val_loader)
                    print("Mean losses:", np.mean(losses))
                    losses = []

                if (self.step_nb + 1) % 200 == 0:
                    print('Mean losses:', np.mean(losses))
                    losses = []


                self.step_nb += 1
            self.epoch_nb += 1
Exemple #2
0
 def _calc_loss(self, manager: ModelManager, inputs,
                labels) -> torch.Tensor:
     with amp.autocast(enabled=self.use_amp):
         preds, labels = manager.preproc_forward(inputs, labels)
         # print("preds", preds)
         loss = self.criterion(preds.view(-1, 2), labels.view(-1).long())
     loss = loss / self.accum_iters
     return loss
def get_model_manager(model=None, device=None) -> ModelManager:
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if model is None:
        model = get_model()
    data_processor = get_rucos_processor()

    return ModelManager(model, data_processor, device=device)
Exemple #4
0
    def _create_manager(self, run_model_kwargs,
                        run_proc_kwargs) -> ModelManager:
        model_kwargs = self.std_model_kwargs.copy()
        model_kwargs.update(run_model_kwargs)
        proc_kwargs = self.std_processor_kwargs.copy()
        proc_kwargs.update(run_proc_kwargs)

        model = SentPairBinaryClassifier(self.mname, **model_kwargs)
        proc = RucosProcessor(self.mname, **proc_kwargs)

        return ModelManager(model, proc, device=self.device)
 def test_save_then_load(self):
     saver = std_objects.get_local_saver()
     model_name = shared_objs.mod_manager.save_model(saver)
     del shared_objs.model
     self.assertIsInstance(model_name, str)
     del shared_objs.mod_manager
     loaded_manager = ModelManager.load(saver, model_name)
     self.assertIsInstance(loaded_manager, ModelManager)
     shared_objs.model = std_objects.get_model()
     shared_objs.mod_manager = std_objects.get_model_manager(
         model=shared_objs.model)
Exemple #6
0
    def prepare_for_fit(self, model_manager: ModelManager,
                        nb_train_steps: int):
        self.optimizer = optim.AdamW(
            [{
                'params': model_manager.get_model().transformer.parameters()
            }, {
                'params': model_manager.get_model().head.parameters(),
                'lr': self.lr_head
            }],
            lr=self.lr,
            weight_decay=self.weight_decay)

        total_steps = nb_train_steps // self.accum_iters
        total_steps = max(total_steps,
                          1)  # if nb_train_steps < self.accum_iters
        self.lr_scheduler = transformers.get_polynomial_decay_schedule_with_warmup(
            optimizer=self.optimizer,
            num_warmup_steps=self.warmup,
            num_training_steps=total_steps,
            lr_end=self.lr_end)
Exemple #7
0
    def _get_placeholders_probs_dataframe(self, manager: ModelManager, loader: DataLoader) -> pd.DataFrame:
        res = {
            'idx': [],
            'probs': [],
            'start': [],
            'end': [],
            'placeholder': []
        }
        for text1, text2, idx, start, end, placeholder in tqdm(loader, mininterval=1):
            idx, probs, start, end, placeholder = manager.predict_postproc((text1, text2, idx, start, end, placeholder))
            res['idx'].extend(idx)

            if probs.ndim > 1:
                res['probs'].extend(probs[:, 1].tolist())
            else:
                try:
                    res["probs"].extend([probs[1]])
                except Exception:
                    res["probs"].extend([-999])

            res['start'].extend(start)
            res['end'].extend(end)
            res['placeholder'].extend(placeholder)
        return pd.DataFrame(data=res)
Exemple #8
0
 def load_best_manager(self) -> ModelManager:
     if self.best_model_name is None:
         raise ValueError("Model was not saved")
     return ModelManager.load(self.saver, self.best_model_name)