def _set_data_desc_to_cfg(self, cfg, data_dir, train_ds, validation_ds): """ Method creates IntentSlotDataDesc and copies generated values to cfg.data_desc. """ # Save data from data desc to config - so it can be reused later, e.g. in inference. data_desc = IntentSlotDataDesc(data_dir=data_dir, modes=[train_ds.prefix, validation_ds.prefix]) OmegaConf.set_struct(cfg, False) if not hasattr(cfg, "data_desc") or cfg.data_desc is None: cfg.data_desc = {} # Intents. cfg.data_desc.intent_labels = list(data_desc.intents_label_ids.keys()) cfg.data_desc.intent_label_ids = data_desc.intents_label_ids cfg.data_desc.intent_weights = data_desc.intent_weights # Slots. cfg.data_desc.slot_labels = list(data_desc.slots_label_ids.keys()) cfg.data_desc.slot_label_ids = data_desc.slots_label_ids cfg.data_desc.slot_weights = data_desc.slot_weights cfg.data_desc.pad_label = data_desc.pad_label # for older(pre - 1.0.0.b3) configs compatibility if not hasattr(cfg, "class_labels") or cfg.class_labels is None: cfg.class_labels = {} cfg.class_labels = OmegaConf.create( {'intent_labels_file': 'intent_labels.csv', 'slot_labels_file': 'slot_labels.csv'} ) slot_labels_file = os.path.join(data_dir, cfg.class_labels.slot_labels_file) intent_labels_file = os.path.join(data_dir, cfg.class_labels.intent_labels_file) self._save_label_ids(data_desc.slots_label_ids, slot_labels_file) self._save_label_ids(data_desc.intents_label_ids, intent_labels_file) self.register_artifact(cfg.class_labels.intent_labels_file, intent_labels_file) self.register_artifact(cfg.class_labels.slot_labels_file, slot_labels_file) OmegaConf.set_struct(cfg, True)
def __init__(self, cfg: DictConfig, trainer: Trainer = None): """ Initializes BERT Joint Intent and Slot model. """ self.data_dir = cfg.data_dir self.max_seq_length = cfg.language_model.max_seq_length self.data_desc = IntentSlotDataDesc( data_dir=cfg.data_dir, modes=[cfg.train_ds.prefix, cfg.validation_ds.prefix]) self._setup_tokenizer(cfg.tokenizer) # init superclass super().__init__(cfg=cfg, trainer=trainer) # initialize Bert model self.bert_model = get_lm_model( pretrained_model_name=cfg.language_model.pretrained_model_name, config_file=cfg.language_model.config_file, config_dict=OmegaConf.to_container(cfg.language_model.config) if cfg.language_model.config else None, checkpoint_file=cfg.language_model.lm_checkpoint, ) self.classifier = SequenceTokenClassifier( hidden_size=self.bert_model.config.hidden_size, num_intents=self.data_desc.num_intents, num_slots=self.data_desc.num_slots, dropout=cfg.head.fc_dropout, num_layers=cfg.head.num_output_layers, log_softmax=False, ) # define losses if cfg.class_balancing == 'weighted_loss': # You may need to increase the number of epochs for convergence when using weighted_loss self.intent_loss = CrossEntropyLoss( logits_ndim=2, weight=self.data_desc.intent_weights) self.slot_loss = CrossEntropyLoss( logits_ndim=3, weight=self.data_desc.slot_weights) else: self.intent_loss = CrossEntropyLoss(logits_ndim=2) self.slot_loss = CrossEntropyLoss(logits_ndim=3) self.total_loss = AggregatorLoss( num_inputs=2, weights=[cfg.intent_loss_weight, 1.0 - cfg.intent_loss_weight]) # setup to track metrics self.intent_classification_report = ClassificationReport( self.data_desc.num_intents, self.data_desc.intents_label_ids) self.slot_classification_report = ClassificationReport( self.data_desc.num_slots, self.data_desc.slots_label_ids) # Optimizer setup needs to happen after all model weights are ready self.setup_optimization(cfg.optim)
def _set_data_desc_to_cfg(self, cfg, data_dir, train_ds, validation_ds): """ Method creates IntentSlotDataDesc and copies generated values to cfg.data_desc. """ # Save data from data desc to config - so it can be reused later, e.g. in inference. data_desc = IntentSlotDataDesc(data_dir=data_dir, modes=[train_ds.prefix, validation_ds.prefix]) OmegaConf.set_struct(cfg, False) if not hasattr(cfg, "data_desc") or cfg.data_desc is None: cfg.data_desc = {} # Intents. cfg.data_desc.intent_labels = list(data_desc.intents_label_ids.keys()) cfg.data_desc.intent_label_ids = data_desc.intents_label_ids cfg.data_desc.intent_weights = data_desc.intent_weights # Slots. cfg.data_desc.slot_labels = list(data_desc.slots_label_ids.keys()) cfg.data_desc.slot_label_ids = data_desc.slots_label_ids cfg.data_desc.slot_weights = data_desc.slot_weights cfg.data_desc.pad_label = data_desc.pad_label OmegaConf.set_struct(cfg, True)
def predict_from_examples(self, queries: List[str], batch_size: int = 32) -> List[List[str]]: """ Get prediction for the queries (intent and slots) Args: queries: text sequences batch_size: batch size to use during inference Returns: predicted_intents, predicted_slots: model intent and slot predictions """ predicted_intents = [] predicted_slots = [] mode = self.training try: device = 'cuda' if torch.cuda.is_available() else 'cpu' # Switch model to evaluation mode self.eval() self.to(device) infer_datalayer = self._setup_infer_dataloader(queries, batch_size) # load intent and slot labels from the dictionary files (user should have them in a data directory) intent_labels, slot_labels = IntentSlotDataDesc.intent_slot_dicts( self.data_dir) for batch in infer_datalayer: input_ids, input_type_ids, input_mask, loss_mask, subtokens_mask = batch intent_logits, slot_logits = self.forward( input_ids=input_ids.to(device), token_type_ids=input_type_ids.to(device), attention_mask=input_mask.to(device), ) # predict intents and slots for these examples # intents intent_preds = tensor2list(torch.argmax(intent_logits, axis=-1)) # convert numerical outputs to Intent and Slot labels from the dictionaries for intent_num in intent_preds: if intent_num < len(intent_labels): predicted_intents.append(intent_labels[intent_num]) else: # should not happen predicted_intents.append("Unknown Intent") # slots slot_preds = torch.argmax(slot_logits, axis=-1) for slot_preds_query, mask_query in zip( slot_preds, subtokens_mask): query_slots = '' for slot, mask in zip(slot_preds_query, mask_query): if mask == 1: if slot < len(slot_labels): query_slots += slot_labels[slot] + ' ' else: query_slots += 'Unknown_slot ' predicted_slots.append(query_slots.strip()) finally: # set mode back to its original value self.train(mode=mode) return predicted_intents, predicted_slots