max_position_embeddings=514, num_attention_heads=12, num_hidden_layers=6, type_vocab_size=1, ) from transformers import RobertaTokenizer tokenizer = RobertaTokenizer.from_pretrained( "F:/PycharmProjects/zajecia/spyder/semantic/textsemantic/content/KantaiBERT", max_length=512) from transformers import RobertaForMaskedLM model = RobertaForMaskedLM(config=config) # print(model) LP = list(model.parameters()) lp = len(LP) print(lp) for p in range(0, lp): print(LP[p]) np = 0 for p in range(0, lp): #number of tensors PL2 = True try: L2 = len(LP[p][0]) #check if 2D except: L2 = 1 #not 2D but 1D PL2 = False L1 = len(LP[p]) L3 = L1 * L2
class LightningRoberta(LightningModule): def __init__(self, config, pretrained_model_path=None): super().__init__() self.config = config if pretrained_model_path == None or pretrained_model_path == '': self.model = RobertaForMaskedLM(self.config) else: self.model = RobertaForMaskedLM.from_pretrained( pretrained_model_path, config=self.config) def set_learning_rate(self, lr): self.learning_rate = lr def set_mask_token_id(self, mask_id): self.mask_token_id = mask_id def forward(self, input_ids, attention_mask, labels=None): return self.model(input_ids, attention_mask=attention_mask, labels=labels) def training_step(self, batch, batch_idx): outputs = self(batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels']) masked_token_acc = masked_token_accuracy(outputs.logits, batch['input_ids'], batch['labels'], self.mask_token_id) self.log("train_loss", outputs.loss, on_step=True, on_epoch=True, prog_bar=True, logger=True) self.log("train_mask_acc", masked_token_acc, on_step=True, on_epoch=True, prog_bar=True, logger=True) return {'loss': outputs.loss, 'masked_token_acc': masked_token_acc} def validation_step(self, batch, batch_idx): outputs = self(batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels']) masked_token_acc = masked_token_accuracy(outputs.logits, batch['input_ids'], batch['labels'], self.mask_token_id) self.log_dict( { 'val_loss': outputs.loss, 'val_mask_acc': masked_token_acc }, on_step=True, on_epoch=True, prog_bar=True, logger=True) return {'loss': outputs.loss, 'masked_token_acc': masked_token_acc} def configure_optimizers(self): return torch.optim.AdamW(self.model.parameters(), lr=self.learning_rate) def init_metrics(self): self.train_results = {} self.val_results = {} # self.train_metrics_log = {} # self.val_metrics_log = {} def set_ckpt_folder(self, ckpt_folder): self.ckpt_folder = ckpt_folder def set_start_epoch(self, epoch_num): self.start_epoch = epoch_num def training_epoch_end(self, training_step_outputs): # self.train_metrics_log[f'epoch_{self.start_epoch+self.current_epoch+1}'] = training_step_outputs total_loss = torch.tensor( 0, dtype=training_step_outputs[0]['loss'].dtype, device=training_step_outputs[0]['loss'].device) total_acc = torch.tensor( 0, dtype=training_step_outputs[0]['masked_token_acc'].dtype, device=training_step_outputs[0]['masked_token_acc'].device) for step_output in training_step_outputs: total_loss += step_output['loss'] total_acc += step_output['masked_token_acc'] self.train_results[ f'epoch_{self.start_epoch+self.current_epoch+1}'] = { 'loss': total_loss / len(training_step_outputs), 'mask_acc': total_acc / len(training_step_outputs) } def validation_epoch_end(self, validation_step_outputs): # self.val_metrics_log[f'epoch_{self.start_epoch+self.current_epoch+1}'] = validation_step_outputs total_loss = torch.tensor( 0, dtype=validation_step_outputs[0]['loss'].dtype, device=validation_step_outputs[0]['loss'].device) total_acc = torch.tensor( 0, dtype=validation_step_outputs[0]['masked_token_acc'].dtype, device=validation_step_outputs[0]['masked_token_acc'].device) for step_output in validation_step_outputs: total_loss += step_output['loss'] total_acc += step_output['masked_token_acc'] self.val_results[f'epoch_{self.start_epoch+self.current_epoch+1}'] = { 'loss': total_loss / len(validation_step_outputs), 'mask_acc': total_acc / len(validation_step_outputs) } def on_train_epoch_end(self): completed_epoch = self.start_epoch + self.current_epoch + 1 print_str = ">>>>" print_str += " train_loss: " + str( round( self.train_results[f'epoch_{completed_epoch}']['loss'].item(), 4)) print_str += " | train_mask_acc: " + str( round( self.train_results[f'epoch_{completed_epoch}'] ['mask_acc'].item(), 4)) print_str += " | val_loss: " + str( round(self.val_results[f'epoch_{completed_epoch}']['loss'].item(), 4)) print_str += " | val_mask_acc: " + str( round( self.val_results[f'epoch_{completed_epoch}'] ['mask_acc'].item(), 4)) ckpt_path = os.path.join(self.ckpt_folder, f"epoch-{completed_epoch}") self.model.save_pretrained(ckpt_path) print(print_str)
intermediate_size=Config['intermediate_size'], type_vocab_size=Config['type_vocab_size']) if Config['last_ckpt_path'] == None or Config['last_ckpt_path'] == '': model = RobertaForMaskedLM(config) else: model = RobertaForMaskedLM.from_pretrained(Config['last_ckpt_path'], config=config) device = torch.device('cuda') if torch.cuda.is_available() else torch.device( 'cpu') # and move our model over to the selected device model.to(device) model.train() # activate training mode optim = torch.optim.AdamW(model.parameters(), lr=Config['learning_rate']) dt_str = train_start_datetime.strftime("D%Y_%m_%d_T%H_%M_%S") model_folder = os.path.join(Config['model_path'], dt_str) ckpt_path = os.path.join(model_folder, 'checkpoints') config_path = os.path.join(model_folder, 'train_config.json') results_path = os.path.join(model_folder, 'results.json') start_epoch = Config['start_epoch'] end_epoch = start_epoch + Config['num_epochs'] results = {} for epoch in range(start_epoch, end_epoch): # setup loop with TQDM and dataloader loop = tqdm(train_dataloader, leave=True) total_train_loss = 0.0