def __init__(self, cfg: DictConfig, trainer: Trainer = None): if cfg.tokenizer is not None: self._setup_tokenizer(cfg.tokenizer) else: self.tokenizer = None super().__init__(cfg=cfg, trainer=trainer) self.bert_model = get_lm_model( pretrained_model_name=cfg.language_model.pretrained_model_name, config_file=cfg.language_model.config_file, config_dict=OmegaConf.to_container(cfg.language_model.config) if cfg.language_model.config else None, checkpoint_file=cfg.language_model.lm_checkpoint, vocab_file=cfg.tokenizer.get('vocab_file') if cfg.tokenizer is not None else None, ) self.hidden_size = self.bert_model.config.hidden_size self.vocab_size = self.bert_model.config.vocab_size self.only_mlm_loss = cfg.only_mlm_loss self.mlm_classifier = BertPretrainingTokenClassifier( hidden_size=self.hidden_size, num_classes=self.vocab_size, num_layers=cfg.num_tok_classification_layers, activation="gelu", log_softmax=True, use_transformer_init=True, ) self.mlm_loss = SmoothedCrossEntropyLoss() if not self.only_mlm_loss: self.nsp_classifier = SequenceClassifier( hidden_size=self.hidden_size, num_classes=2, num_layers=cfg.num_seq_classification_layers, log_softmax=False, activation="tanh", use_transformer_init=True, ) self.nsp_loss = CrossEntropyLoss() self.agg_loss = AggregatorLoss(num_inputs=2) # # tie weights of MLM softmax layer and embedding layer of the encoder if (self.mlm_classifier.mlp.last_linear_layer.weight.shape != self.bert_model.embeddings.word_embeddings.weight.shape): raise ValueError( "Final classification layer does not match embedding layer.") self.mlm_classifier.mlp.last_linear_layer.weight = self.bert_model.embeddings.word_embeddings.weight # create extra bias # setup to track metrics self.validation_perplexity = Perplexity(compute_on_step=False) self.setup_optimization(cfg.optim)
def __init__(self, cfg: DictConfig, trainer: Trainer = None): """ Initializes BERT Joint Intent and Slot model. """ self.data_dir = cfg.data_dir self.max_seq_length = cfg.language_model.max_seq_length self.data_desc = IntentSlotDataDesc( data_dir=cfg.data_dir, modes=[cfg.train_ds.prefix, cfg.validation_ds.prefix]) self._setup_tokenizer(cfg.tokenizer) # init superclass super().__init__(cfg=cfg, trainer=trainer) # initialize Bert model self.bert_model = get_lm_model( pretrained_model_name=cfg.language_model.pretrained_model_name, config_file=cfg.language_model.config_file, config_dict=OmegaConf.to_container(cfg.language_model.config) if cfg.language_model.config else None, checkpoint_file=cfg.language_model.lm_checkpoint, ) self.classifier = SequenceTokenClassifier( hidden_size=self.bert_model.config.hidden_size, num_intents=self.data_desc.num_intents, num_slots=self.data_desc.num_slots, dropout=cfg.head.fc_dropout, num_layers=cfg.head.num_output_layers, log_softmax=False, ) # define losses if cfg.class_balancing == 'weighted_loss': # You may need to increase the number of epochs for convergence when using weighted_loss self.intent_loss = CrossEntropyLoss( logits_ndim=2, weight=self.data_desc.intent_weights) self.slot_loss = CrossEntropyLoss( logits_ndim=3, weight=self.data_desc.slot_weights) else: self.intent_loss = CrossEntropyLoss(logits_ndim=2) self.slot_loss = CrossEntropyLoss(logits_ndim=3) self.total_loss = AggregatorLoss( num_inputs=2, weights=[cfg.intent_loss_weight, 1.0 - cfg.intent_loss_weight]) # setup to track metrics self.intent_classification_report = ClassificationReport( self.data_desc.num_intents, self.data_desc.intents_label_ids) self.slot_classification_report = ClassificationReport( self.data_desc.num_slots, self.data_desc.slots_label_ids) # Optimizer setup needs to happen after all model weights are ready self.setup_optimization(cfg.optim)
def __init__(self, cfg: DictConfig, trainer: Trainer = None): """ Initializes BERT Punctuation and Capitalization model. """ self.setup_tokenizer(cfg.tokenizer) super().__init__(cfg=cfg, trainer=trainer) self.bert_model = get_lm_model( pretrained_model_name=cfg.language_model.pretrained_model_name, config_file=self.register_artifact('language_model.config_file', cfg.language_model.config_file), config_dict=OmegaConf.to_container(cfg.language_model.config) if cfg.language_model.config else None, checkpoint_file=cfg.language_model.lm_checkpoint, vocab_file=self.register_artifact('tokenizer.vocab_file', cfg.tokenizer.vocab_file), ) self.punct_classifier = TokenClassifier( hidden_size=self.bert_model.config.hidden_size, num_classes=len(self._cfg.punct_label_ids), activation=cfg.punct_head.activation, log_softmax=False, dropout=cfg.punct_head.fc_dropout, num_layers=cfg.punct_head.punct_num_fc_layers, use_transformer_init=cfg.punct_head.use_transformer_init, ) self.capit_classifier = TokenClassifier( hidden_size=self.bert_model.config.hidden_size, num_classes=len(self._cfg.capit_label_ids), activation=cfg.capit_head.activation, log_softmax=False, dropout=cfg.capit_head.fc_dropout, num_layers=cfg.capit_head.capit_num_fc_layers, use_transformer_init=cfg.capit_head.use_transformer_init, ) self.loss = CrossEntropyLoss(logits_ndim=3) self.agg_loss = AggregatorLoss(num_inputs=2) # setup to track metrics self.punct_class_report = ClassificationReport( num_classes=len(self._cfg.punct_label_ids), label_ids=self._cfg.punct_label_ids, mode='macro', dist_sync_on_step=True, ) self.capit_class_report = ClassificationReport( num_classes=len(self._cfg.capit_label_ids), label_ids=self._cfg.capit_label_ids, mode='macro', dist_sync_on_step=True, )
def _reconfigure_classifier(self) -> None: """ Method reconfigures the classifier depending on the settings of model cfg.data_desc """ self.classifier = SequenceTokenClassifier( hidden_size=self.bert_model.config.hidden_size, num_intents=len(self.cfg.data_desc.intent_labels), num_slots=len(self.cfg.data_desc.slot_labels), dropout=self.cfg.head.fc_dropout, num_layers=self.cfg.head.num_output_layers, log_softmax=False, ) # define losses if self.cfg.class_balancing == "weighted_loss": # You may need to increase the number of epochs for convergence when using weighted_loss self.intent_loss = BCEWithLogitsLoss( logits_ndim=2, pos_weight=self.cfg.data_desc.intent_weights) self.slot_loss = CrossEntropyLoss( logits_ndim=3, weight=self.cfg.data_desc.slot_weights) else: self.intent_loss = BCEWithLogitsLoss(logits_ndim=2) self.slot_loss = CrossEntropyLoss(logits_ndim=3) self.total_loss = AggregatorLoss( num_inputs=2, weights=[ self.cfg.intent_loss_weight, 1.0 - self.cfg.intent_loss_weight ], ) # setup to track metrics self.intent_classification_report = MultiLabelClassificationReport( num_classes=len(self.cfg.data_desc.intent_labels), label_ids=self.cfg.data_desc.intent_label_ids, dist_sync_on_step=True, mode="micro", ) self.slot_classification_report = ClassificationReport( num_classes=len(self.cfg.data_desc.slot_labels), label_ids=self.cfg.data_desc.slot_label_ids, dist_sync_on_step=True, mode="micro", )