Ejemplo n.º 1
0
    pretrained_model_name=args.pretrained_bert_model, factory=nf)
hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)

data_desc = JointIntentSlotDataDesc(args.data_dir, args.do_lower_case,
                                    args.dataset_name, args.none_slot_label,
                                    args.pad_label)

# Create sentence classification loss on top
classifier = nemo_nlp.JointIntentSlotClassifier(
    hidden_size=hidden_size,
    num_intents=data_desc.num_intents,
    num_slots=data_desc.num_slots,
    dropout=args.fc_dropout)

loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=data_desc.num_slots)


def create_pipeline(num_samples=-1,
                    batch_size=32,
                    num_gpus=1,
                    local_rank=0,
                    mode='train'):
    nf.logger.info(f"Loading {mode} data...")
    data_file = f'{data_desc.data_dir}/{mode}.tsv'
    slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv'
    shuffle = args.shuffle_data if mode == 'train' else False

    data_layer = nemo_nlp.BertJointIntentSlotDataLayer(
        input_file=data_file,
        slot_file=slot_file,
Ejemplo n.º 2
0
                                    args.pad_label)

# Create sentence classification loss on top
classifier = nemo_nlp.JointIntentSlotClassifier(
    hidden_size=hidden_size,
    num_intents=data_desc.num_intents,
    num_slots=data_desc.num_slots,
    dropout=args.fc_dropout)

if args.class_balancing == 'weighted_loss':
    # Using weighted loss will enable weighted loss for both intents and slots
    # Use the intent_loss_weight hyperparameter to adjust intent loss to
    # prevent overfitting or underfitting.
    loss_fn = nemo_nlp.JointIntentSlotLoss(
      num_slots=data_desc.num_slots,
      slot_classes_loss_weights=data_desc.slot_weights,
      intent_classes_loss_weights=data_desc.intent_weights,
      intent_loss_weight=args.intent_loss_weight)
else:
    loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=data_desc.num_slots)


def create_pipeline(num_samples=-1,
                    batch_size=32,
                    num_gpus=1,
                    local_rank=0,
                    mode='train'):
    nemo.logging.info(f"Loading {mode} data...")
    data_file = f'{data_desc.data_dir}/{mode}.tsv'
    slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv'
    shuffle = args.shuffle_data if mode == 'train' else False
Ejemplo n.º 3
0
        num_slots = 12
    pad_label = num_slots - 1
else:
    nf.logger.info("Looks like you pass in the name of dataset that isn't "
                   "already supported by NeMo. Please make sure that you "
                   "build the preprocessing method for it.")

# Create sentence classification loss on top
hidden_size = pretrained_bert_model.local_parameters["hidden_size"]

classifier = nemo_nlp.JointIntentSlotClassifier(hidden_size=hidden_size,
                                                num_intents=num_intents,
                                                num_slots=num_slots,
                                                dropout=args.fc_dropout)

loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=num_slots)

tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)


def create_pipeline(data_file,
                    slot_file,
                    max_seq_length,
                    batch_size=32,
                    num_samples=-1,
                    shuffle=True,
                    num_gpus=1,
                    local_rank=0,
                    mode='train'):
    nf.logger.info(f"Loading {mode} data...")
    data_layer = nemo_nlp.BertJointIntentSlotDataLayer(