def __init__(self) -> None: super().__init__() self.optim = AdamW self.scheduler = ReduceLROnPlateau self.model = AutoModel.from_pretrained( Const.MODEL_NAME, num_labels=Label("REL").count, return_dict=True, ) self.tokenizer = AutoTokenizer.from_pretrained( Const.MODEL_NAME, add_prefix_space=True ) self.tokenizer.add_special_tokens( {"additional_special_tokens": Const.SPECIAL_TOKENS} ) self.model.resize_token_embeddings(len(self.tokenizer)) hidden_size = self.model.config.hidden_size dropout = ( 0 if Const.MODEL_NAME.startswith("distil") else self.model.config.hidden_dropout_prob ) self.cls_fc_layer = FCLayer(hidden_size, hidden_size, dropout) self.entity_fc_layer = FCLayer(hidden_size, hidden_size, dropout) self.label_classifier = FCLayer( hidden_size * 3, Label("REL").count, dropout, use_activation=False, )
def update(self, preds: torch.Tensor, targets: torch.Tensor) -> None: pred_biluo = [] target_biluo = [] for i, _ in enumerate(targets): true_labels_idx: list = [ idx for idx, lab in enumerate(targets[i]) if lab != -100 ] pred_biluo.append( [Label("GER").idx[pred.item()] for pred in preds[i, true_labels_idx]] ) target_biluo.append( [ Label("GER").idx[target.item()] for target in targets[i, true_labels_idx] ] ) report: dict = classification_report( y_true=target_biluo, y_pred=pred_biluo, mode="strict", scheme=BILOU, output_dict=True, zero_division=1, ) self.f1s = torch.cat( ( self.f1s, torch.tensor([report.pop("micro avg")["f1-score"]], device=self.f1s.get_device()), ) )
def test_step(self, batch: tdict, batch_idx: int) -> Tensor: step_out = self.step(batch, batch_idx) loss = step_out["loss"] test_f1 = f1( step_out["probs"], batch["labels"], num_classes=Label("REL").count, ) self.log_dict({"test_loss": loss, "test_f1": test_f1}) return loss
def __init__(self, path: Path, tokenizer=AutoTokenizer) -> None: super().__init__() data: pd.DataFrame = pd.read_csv(path) data["relation"] = data["relation"].apply(lambda x: Label("REL").labels[x]) tokenizer = tokenizer.from_pretrained(Const.MODEL_NAME) tokenizer.add_special_tokens( {"additional_special_tokens": Const.SPECIAL_TOKENS} ) self.data: list[dict] = [ convert_input(row, Const.MAX_TOKEN_LEN, tokenizer) for row in data.to_dict(orient="records") ] self.data = list(filter(None, self.data))
def __init__(self) -> None: super().__init__() self.optim = AdamW self.scheduler = ReduceLROnPlateau self.train_f1 = Seqeval() self.val_f1 = Seqeval() self.test_f1 = Seqeval() self.model = AutoModelForTokenClassification.from_pretrained( Const.MODEL_NAME, num_labels=Label("GER").count, return_dict=True, id2label=Label("GER").idx, label2id=Label("GER").labels, finetuning_task="ger", ) self.tokenizer = AutoTokenizer.from_pretrained( Const.MODEL_NAME, add_prefix_space=True ) self.tokenizer.add_special_tokens( {"additional_special_tokens": Const.SPECIAL_TOKENS} ) self.model.resize_token_embeddings(len(self.tokenizer))
def validation_step(self, batch: tdict, batch_idx: int) -> Tensor: step_out = self.step(batch, batch_idx) loss = step_out["loss"] val_f1 = f1( step_out["probs"], batch["labels"], num_classes=Label("REL").count, ) self.log_dict( {"val_loss": loss, "val_f1": val_f1}, on_step=False, on_epoch=True, prog_bar=True, ) return loss
def training_step(self, batch: tdict, batch_idx: int) -> Tensor: step_out = self.step(batch, batch_idx) loss = step_out["loss"] train_f1 = f1( step_out["probs"], batch["labels"], num_classes=Label("REL").count, ) self.log_dict( {"train_loss": loss, "train_f1": train_f1}, on_step=True, on_epoch=True, prog_bar=True, ) return loss
def forward( self, input_ids: Tensor, attention_mask: Tensor, labels: Tensor, e1_mask: Tensor, e2_mask: Tensor, ) -> tdict: outputs = self.model(input_ids=input_ids, attention_mask=attention_mask) hidden_state = outputs[0] # (bs, seq_len, dim) pooled_output = hidden_state[:, 0] # [CLS] token (bs, dim) # Average e1_h = self.entity_average(hidden_state, e1_mask) e2_h = self.entity_average(hidden_state, e2_mask) # Dropout -> tanh -> fc_layer (Share FC layer for e1 and e2) pooled_output = self.cls_fc_layer(pooled_output) e1_h = self.entity_fc_layer(e1_h) e2_h = self.entity_fc_layer(e2_h) # Concat -> fc_layer concat_h = torch.cat( [ pooled_output, e1_h, e2_h, ], dim=-1, ) logits = self.label_classifier(concat_h) # add hidden states and attention if they are here outputs = (logits,) + outputs[2:] # Softmax if labels is not None: loss_fct = nn.CrossEntropyLoss() loss = loss_fct(logits.view(-1, Label("REL").count), labels.view(-1)) outputs = (loss,) + outputs return outputs # (loss), logits, (hidden_states), (attentions)