def _save_model(self, save_path: str, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, iteration: int, optimizer: Optimizer = None, save_as_best: bool = False, extra: dict = None, include_iteration: int = True, name: str = 'model'): extra_state = dict(iteration=iteration) if optimizer: extra_state['optimizer'] = optimizer.state_dict() if extra: extra_state.update(extra) if save_as_best: dir_path = os.path.join(save_path, '%s_best' % name) else: dir_name = '%s_%s' % (name, iteration) if include_iteration else name dir_path = os.path.join(save_path, dir_name) util.create_directories_dir(dir_path) # save model if isinstance(model, DataParallel): model.module.save_pretrained(dir_path) else: model.save_pretrained(dir_path) # save vocabulary tokenizer.save_pretrained(dir_path) # save extra state_path = os.path.join(dir_path, 'extra.state') torch.save(extra_state, state_path)
def save_checkpoint( logger: lavd.Logger, model: PreTrainedModel, tokeniser: PreTrainedTokenizer, stats: Dict, step: int, ): if not logger.disabled: logger.save_obj(stats, "stats", step=step) stats_path = logger.get_file_path("stats", step=step, extension=".pt") out_dir = stats_path.parent out_dir.mkdir(parents=True, exist_ok=True) model.save_pretrained(out_dir) tokeniser.save_pretrained(out_dir)
def update_tracker( self, new_value: float, epoch: int, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, model_output_dir: str, ): if self.comparison_func(old_value=self.best_value, new_value=new_value): logging.info("Saving new best model") self.best_epoch = epoch self.best_value = new_value model.save_pretrained(model_output_dir) tokenizer.save_pretrained(model_output_dir)
def train_model(df: pd.DataFrame, tokenizer: transformers.PreTrainedTokenizer, model: transformers.PreTrainedModel, steps: int, batch_size: int, save_path:str) -> None: device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') #device = torch.device('cpu') model.to(device) model.train() optim = AdamW(model.parameters(), lr=5e-5) losses = [] for step in trange(steps): optim.zero_grad() sample = df.sample(batch_size) X = sample['articles'].tolist() y = sample['labels'].tolist() inputs = tokenizer(X, return_tensors='pt', padding=True, truncation=True) input_ids = inputs['input_ids'].to(device) attention_mask = inputs['attention_mask'].to(device) labels = torch.tensor(y).unsqueeze(1).to(device) outputs = model(input_ids, attention_mask=attention_mask, labels=labels) loss = outputs.loss losses.append(loss) if (step + 1) % 100 == 0: print(f'Step: {step + 1} Loss: {sum(losses)/len(losses)}') send_message(f'Step: {step + 1} Loss: {sum(losses)/len(losses)}') losses = [] loss.backward() optim.step() model.save_pretrained(save_path)