コード例 #1
0
def create_pipeline(num_samples=-1,
                    batch_size=32,
                    num_gpus=1,
                    local_rank=0,
                    mode='train'):
    nemo.logging.info(f"Loading {mode} data...")
    data_file = f'{data_desc.data_dir}/{mode}.tsv'
    slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv'
    shuffle = args.shuffle_data if mode == 'train' else False

    data_layer = nemo_nlp.BertJointIntentSlotDataLayer(
        input_file=data_file,
        slot_file=slot_file,
        pad_label=data_desc.pad_label,
        tokenizer=tokenizer,
        max_seq_length=args.max_seq_length,
        num_samples=num_samples,
        shuffle=shuffle,
        batch_size=batch_size,
        num_workers=0,
        local_rank=local_rank,
        ignore_extra_tokens=args.ignore_extra_tokens,
        ignore_start_end=args.ignore_start_end
        )

    ids, type_ids, input_mask, loss_mask, \
        subtokens_mask, intents, slots = data_layer()
    data_size = len(data_layer)

    print(f'The length of data layer is {data_size}')

    if data_size < batch_size:
        nemo.logging.warning("Batch_size is larger than the dataset size")
        nemo.logging.warning("Reducing batch_size to dataset size")
        batch_size = data_size

    steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
    nemo.logging.info(f"Steps_per_epoch = {steps_per_epoch}")

    hidden_states = pretrained_bert_model(input_ids=ids,
                                          token_type_ids=type_ids,
                                          attention_mask=input_mask)

    intent_logits, slot_logits = classifier(hidden_states=hidden_states)

    loss = loss_fn(intent_logits=intent_logits,
                   slot_logits=slot_logits,
                   loss_mask=loss_mask,
                   intents=intents,
                   slots=slots)

    if mode == 'train':
        tensors_to_evaluate = [loss, intent_logits, slot_logits]
    else:
        tensors_to_evaluate = [intent_logits, slot_logits, intents,
                               slots, subtokens_mask]

    return tensors_to_evaluate, loss, steps_per_epoch, data_layer
コード例 #2
0
def create_pipeline(dataset,
                    batch_size=32,
                    num_gpus=1,
                    local_rank=0,
                    mode='train'):
    data_layer = nemo_nlp.BertJointIntentSlotDataLayer(dataset,
                                                       batch_size=batch_size,
                                                       num_workers=0,
                                                       local_rank=local_rank)

    ids, type_ids, input_mask, slot_mask, intents, slots = data_layer()
    data_size = len(data_layer)

    if data_size < batch_size:
        nf.logger.warning("Batch_size is larger than the dataset size")
        nf.logger.warning("Reducing batch_size to dataset size")
        batch_size = data_size

    steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
    nf.logger.info(f"Steps_per_epoch = {steps_per_epoch}")

    hidden_states = pretrained_bert_model(input_ids=ids,
                                          token_type_ids=type_ids,
                                          attention_mask=input_mask)

    intent_logits, slot_logits = classifier(hidden_states=hidden_states)

    loss = loss_fn(intent_logits=intent_logits,
                   slot_logits=slot_logits,
                   input_mask=input_mask,
                   intents=intents,
                   slots=slots)

    if mode == 'train':
        tensors_to_evaluate = [loss, intent_logits, slot_logits]
    else:
        tensors_to_evaluate = [intent_logits, slot_logits, intents, slots]

    return tensors_to_evaluate, loss, steps_per_epoch, data_layer
コード例 #3
0
"""
pretrained_bert_model = nemo_nlp.huggingface.BERT(
    pretrained_model_name=args.pretrained_bert_model)
hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)

data_desc = JointIntentSlotDataDesc(args.data_dir, args.do_lower_case,
                                    args.dataset_name)

# Evaluation pipeline
nf.logger.info("Loading eval data...")
data_layer = nemo_nlp.BertJointIntentSlotDataLayer(
    input_file=f'{data_desc.data_dir}/{args.eval_file_prefix}.tsv',
    slot_file=f'{data_desc.data_dir}/{args.eval_file_prefix}_slots.tsv',
    pad_label=data_desc.pad_label,
    tokenizer=tokenizer,
    max_seq_length=args.max_seq_length,
    shuffle=False,
    batch_size=args.batch_size,
    num_workers=0,
    local_rank=args.local_rank)

classifier = nemo_nlp.JointIntentSlotClassifier(
    hidden_size=hidden_size,
    num_intents=data_desc.num_intents,
    num_slots=data_desc.num_slots)

ids, type_ids, \
    input_mask, loss_mask, subtokens_mask, \
    intents, slots = data_layer()

hidden_states = pretrained_bert_model(input_ids=ids,
コード例 #4
0
def create_pipeline(data_file,
                    slot_file,
                    max_seq_length,
                    batch_size=32,
                    num_samples=-1,
                    shuffle=True,
                    num_gpus=1,
                    local_rank=0,
                    mode='train'):
    nf.logger.info(f"Loading {mode} data...")
    data_layer = nemo_nlp.BertJointIntentSlotDataLayer(
        path_to_data=data_file,
        path_to_slot=slot_file,
        pad_label=pad_label,
        tokenizer=tokenizer,
        mode=mode,
        max_seq_length=max_seq_length,
        num_samples=num_samples,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=0,
        local_rank=local_rank)

    ids, type_ids, input_mask, slot_mask, intents, slots = data_layer()
    data_size = len(data_layer)

    if data_size < batch_size:
        nf.logger.warning("Batch_size is larger than the dataset size")
        nf.logger.warning("Reducing batch_size to dataset size")
        batch_size = data_size

    steps_per_epoch = int(data_size / (batch_size * num_gpus))

    nf.logger.info(f"Steps_per_epoch = {steps_per_epoch}")

    hidden_states = pretrained_bert_model(input_ids=ids,
                                          token_type_ids=type_ids,
                                          attention_mask=input_mask)

    intent_logits, slot_logits = classifier(hidden_states=hidden_states)

    loss = loss_fn(intent_logits=intent_logits,
                   slot_logits=slot_logits,
                   input_mask=input_mask,
                   intents=intents,
                   slots=slots)

    # Create trainer and execute training action
    if mode == 'train':
        callback_fn = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss, intent_logits, slot_logits],
            print_func=lambda x: str(np.round(x[0].item(), 3)),
            tb_writer=nf.tb_writer,
            get_tb_values=lambda x: [["loss", x[0]]],
            step_freq=100)
    elif mode == 'eval':
        callback_fn = nemo.core.EvaluatorCallback(
            eval_tensors=[intent_logits, slot_logits, intents, slots],
            user_iter_callback=lambda x, y: eval_iter_callback(
                x, y, data_layer),
            user_epochs_done_callback=lambda x: eval_epochs_done_callback(
                x, f'{nf.work_dir}/graphs'),
            tb_writer=nf.tb_writer,
            eval_step=steps_per_epoch)

    return loss, callback_fn, steps_per_epoch
コード例 #5
0
    tokenizer=tokenizer,
    max_seq_length=args.max_seq_length,
    shuffle=False)

classifier = nemo_nlp.JointIntentSlotClassifier(
    hidden_size=hidden_size,
    num_intents=data_desc.num_intents,
    num_slots=data_desc.num_slots,
    dropout=args.fc_dropout)

loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=data_desc.num_slots)

# Evaluation pipeline
nf.logger.info("Loading eval data...")
data_layer = nemo_nlp.BertJointIntentSlotDataLayer(dataset,
                                                   batch_size=args.batch_size,
                                                   num_workers=0,
                                                   local_rank=args.local_rank)

ids, type_ids, input_mask, slot_mask, intents, slots = data_layer()

hidden_states = pretrained_bert_model(input_ids=ids,
                                      token_type_ids=type_ids,
                                      attention_mask=input_mask)

intent_logits, slot_logits = classifier(hidden_states=hidden_states)

###########################################################################

# Instantiate an optimizer to perform `infer` action
evaluated_tensors = nf.infer(
    tensors=[intent_logits, slot_logits, slot_mask, intents, slots],
コード例 #6
0
classifier = nemo_nlp.JointIntentSlotClassifier(hidden_size=hidden_size,
                                                num_intents=num_intents,
                                                num_slots=num_slots,
                                                dropout=args.fc_dropout)

loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=num_slots)

# Evaluation pipeline
logger.info("Loading eval data...")
data_layer = nemo_nlp.BertJointIntentSlotDataLayer(
    path_to_data=data_dir + '/test.tsv',
    path_to_slot=data_dir + '/test_slots.tsv',
    pad_label=num_slots - 1,
    tokenizer=tokenizer,
    mode='eval',
    max_seq_length=args.max_seq_length,
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=0,
    local_rank=args.local_rank)

ids, type_ids, input_mask, slot_mask, intents, slots = data_layer()

hidden_states = pretrained_bert_model(input_ids=ids,
                                      token_type_ids=type_ids,
                                      attention_mask=input_mask)

intent_logits, slot_logits = classifier(hidden_states=hidden_states)

###########################################################################