pretrained_model_name=args.pretrained_bert_model, factory=nf) hidden_size = pretrained_bert_model.local_parameters["hidden_size"] tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model) data_desc = JointIntentSlotDataDesc(args.data_dir, args.do_lower_case, args.dataset_name, args.none_slot_label, args.pad_label) # Create sentence classification loss on top classifier = nemo_nlp.JointIntentSlotClassifier( hidden_size=hidden_size, num_intents=data_desc.num_intents, num_slots=data_desc.num_slots, dropout=args.fc_dropout) loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=data_desc.num_slots) def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mode='train'): nf.logger.info(f"Loading {mode} data...") data_file = f'{data_desc.data_dir}/{mode}.tsv' slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv' shuffle = args.shuffle_data if mode == 'train' else False data_layer = nemo_nlp.BertJointIntentSlotDataLayer( input_file=data_file, slot_file=slot_file,
args.pad_label) # Create sentence classification loss on top classifier = nemo_nlp.JointIntentSlotClassifier( hidden_size=hidden_size, num_intents=data_desc.num_intents, num_slots=data_desc.num_slots, dropout=args.fc_dropout) if args.class_balancing == 'weighted_loss': # Using weighted loss will enable weighted loss for both intents and slots # Use the intent_loss_weight hyperparameter to adjust intent loss to # prevent overfitting or underfitting. loss_fn = nemo_nlp.JointIntentSlotLoss( num_slots=data_desc.num_slots, slot_classes_loss_weights=data_desc.slot_weights, intent_classes_loss_weights=data_desc.intent_weights, intent_loss_weight=args.intent_loss_weight) else: loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=data_desc.num_slots) def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mode='train'): nemo.logging.info(f"Loading {mode} data...") data_file = f'{data_desc.data_dir}/{mode}.tsv' slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv' shuffle = args.shuffle_data if mode == 'train' else False
num_slots = 12 pad_label = num_slots - 1 else: nf.logger.info("Looks like you pass in the name of dataset that isn't " "already supported by NeMo. Please make sure that you " "build the preprocessing method for it.") # Create sentence classification loss on top hidden_size = pretrained_bert_model.local_parameters["hidden_size"] classifier = nemo_nlp.JointIntentSlotClassifier(hidden_size=hidden_size, num_intents=num_intents, num_slots=num_slots, dropout=args.fc_dropout) loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=num_slots) tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model) def create_pipeline(data_file, slot_file, max_seq_length, batch_size=32, num_samples=-1, shuffle=True, num_gpus=1, local_rank=0, mode='train'): nf.logger.info(f"Loading {mode} data...") data_layer = nemo_nlp.BertJointIntentSlotDataLayer(