Beispiel #1
0
        model = nemo_nlp.huggingface.BERT(**config)
    else:
        model = nemo_nlp.huggingface.BERT(
            pretrained_model_name=args.pretrained_bert_model)

    model.restore_from(args.bert_checkpoint)

hidden_size = model.local_parameters["hidden_size"]

# uses [CLS] token for classification (the first token)
if args.task_name == 'sts-b':
    pooler = nemo_nlp.SequenceRegression(hidden_size=hidden_size)
    glue_loss = MSELoss()
else:
    pooler = nemo_nlp.SequenceClassifier(hidden_size=hidden_size,
                                         num_classes=num_labels,
                                         log_softmax=False)
    glue_loss = CrossEntropyLoss()


def create_pipeline(max_seq_length=args.max_seq_length,
                    batch_size=args.batch_size,
                    local_rank=args.local_rank,
                    num_gpus=args.num_gpus,
                    evaluate=False,
                    processor=task_processors[0]):

    data_layer = 'GlueDataLayerClassification'
    if output_mode == 'regression':
        data_layer = 'GlueDataLayerRegression'
Beispiel #2
0
                                       d_model=args.d_model,
                                       num_heads=args.num_heads,
                                       d_inner=args.d_inner,
                                       max_seq_length=args.max_seq_length,
                                       hidden_act="gelu")
""" create necessary modules for the whole translation pipeline, namely
data layers, BERT encoder, and MLM and NSP loss functions
"""
mlm_classifier = nemo_nlp.TokenClassifier(args.d_model,
                                          num_classes=tokenizer.vocab_size,
                                          num_layers=1,
                                          log_softmax=True)
mlm_loss_fn = nemo_nlp.MaskedLanguageModelingLossNM()

nsp_classifier = nemo_nlp.SequenceClassifier(args.d_model,
                                             num_classes=2,
                                             num_layers=2,
                                             log_softmax=True)
nsp_loss_fn = nemo.backends.pytorch.common.CrossEntropyLoss()

bert_loss = nemo_nlp.LossAggregatorNM(num_inputs=2)

# tie weights of MLM softmax layer and embedding layer of the encoder
mlm_classifier.mlp.last_linear_layer.weight = \
    bert_model.bert.embeddings.word_embeddings.weight


def create_pipeline(data_file, max_seq_length, mask_probability,
                    short_seq_prob, batch_size):
    data_layer = nemo_nlp.BertPretrainingDataLayer(tokenizer,
                                                   data_file,
                                                   max_seq_length,
Beispiel #3
0
""" Load the pretrained BERT parameters
See the list of pretrained models, call:
nemo_nlp.huggingface.BERT.list_pretrained_models()
"""
pretrained_bert_model = nemo_nlp.huggingface.BERT(
    pretrained_model_name=args.pretrained_bert_model)
hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)

data_desc = SentenceClassificationDataDesc(
    args.dataset_name, args.data_dir, args.do_lower_case)

# Create sentence classification loss on top
classifier = nemo_nlp.SequenceClassifier(hidden_size=hidden_size,
                                         num_classes=data_desc.num_labels,
                                         dropout=args.fc_dropout)

loss_fn = nemo.backends.pytorch.common.CrossEntropyLoss()


def create_pipeline(num_samples=-1,
                    batch_size=32,
                    num_gpus=1,
                    local_rank=0,
                    mode='train'):
    nf.logger.info(f"Loading {mode} data...")
    data_file = f'{data_desc.data_dir}/{mode}.tsv'
    shuffle = args.shuffle_data if mode == 'train' else False

    data_layer = nemo_nlp.BertSentenceClassificationDataLayer(
Beispiel #4
0
if args.bert_checkpoint is not None:
    bert_model.restore_from(args.bert_checkpoint)
""" create necessary modules for the whole translation pipeline, namely
data layers, BERT encoder, and MLM and NSP loss functions
"""

mlm_classifier = nemo_nlp.BertTokenClassifier(args.hidden_size,
                                              num_classes=args.vocab_size,
                                              activation=args.hidden_act,
                                              log_softmax=True)
mlm_loss_fn = nemo_nlp.MaskedLanguageModelingLossNM()
if not args.only_mlm_loss:
    nsp_classifier = nemo_nlp.SequenceClassifier(args.hidden_size,
                                                 num_classes=2,
                                                 num_layers=2,
                                                 activation='tanh',
                                                 log_softmax=False)
    nsp_loss_fn = nemo.backends.pytorch.common.CrossEntropyLoss()

    bert_loss = nemo_nlp.LossAggregatorNM(num_inputs=2)

# tie weights of MLM softmax layer and embedding layer of the encoder
if (mlm_classifier.mlp.last_linear_layer.weight.shape !=
        bert_model.bert.embeddings.word_embeddings.weight.shape):
    raise ValueError("Final classification layer does not match embedding "
                     "layer.")
mlm_classifier.mlp.last_linear_layer.weight = \
    bert_model.bert.embeddings.word_embeddings.weight

Beispiel #5
0
def sentence_classification(args):
    # TODO: construct name of experiment based on args
    """
    name = construct_name(
            args.exp_name,
            args.lr,
            args.batch_size,
            args.num_epochs,
            args.weight_decay,
            args.optimizer)
    work_dir = name
    if args.work_dir:
        work_dir = os.path.join(args.work_dir, name)
    """
    # Instantiate neural modules
    nf = NeuralModuleFactory(
        backend=nemo.core.Backend.PyTorch,
        local_rank=args.local_rank,
        optimization_level=args.amp_opt_level,
        log_dir=args.work_dir,
        create_tb_writer=True,
        files_to_copy=[__file__],
        add_time_to_log_dir=True)

    # Pre-trained BERT
    tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)

    if args.bert_checkpoint is None:
        bert = nemo_nlp.BERT(pretrained_model_name=args.pretrained_bert_model)
        # save bert config for inference after fine-tuning
        bert_config = bert.config.to_dict()
        with open(args.work_dir + '/' + args.pretrained_bert_model + '_config.json', 'w+') as json_file:
            json.dump(bert_config, json_file)
    else:
        if args.bert_config is not None:
            with open(args.bert_config) as json_file:
                bert_config = json.load(json_file)
        bert = nemo_nlp.BERT(**bert_config)
        bert.restore_from(args.bert_checkpoint)

    # MLP
    bert_hidden_size = bert.local_parameters['hidden_size']
    mlp = nemo_nlp.SequenceClassifier(
        hidden_size=bert_hidden_size,
        num_classes=args.num_classes,
        num_layers=args.num_layers,
        log_softmax=False,
        dropout=args.dropout)

    # TODO: save mlp/all model configs (bake in to Neural Module?)

    if args.mlp_checkpoint:
        mlp.restore_from(args.mlp_checkpoint)
    
    # Loss function for classification
    loss_fn = CrossEntropyLoss()

    # Data layers, pipelines, and callbacks
    callbacks = [] # callbacks depend on files present

    if args.train_file:
        if args.preproc:
            train_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer(
            input_file=args.train_file,
            shuffle=True,
            num_samples=args.num_samples, # lower for dev, -1 for all dataset
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)

        else:
            train_data_layer = nemo_nlp.BertSentenceClassificationDataLayer(
            input_file=args.train_file,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            shuffle=True,
            num_samples=args.num_samples, # lower for dev, -1 for all dataset
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)

        train_logits, train_loss, steps_per_epoch, train_labels = create_pipeline(
            nf,
            train_data_layer,
            bert,
            mlp,
            loss_fn)

        train_callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[train_loss, train_logits],
            print_func=lambda x: nf.logger.info(f'Train loss: {str(np.round(x[0].item(), 3))}'),
            tb_writer=nf.tb_writer,
            get_tb_values=lambda x: [["train_loss", x[0]]],
            step_freq=steps_per_epoch)

        callbacks.append(train_callback)

        if args.num_checkpoints != 0:
            ckpt_callback = nemo.core.CheckpointCallback(
                folder=nf.checkpoint_dir,
                epoch_freq=args.save_epoch_freq,
                step_freq=args.save_step_freq,
                checkpoints_to_keep=args.num_checkpoints)
            
            callbacks.append(ckpt_callback)
        

    if args.eval_file:
        if args.preproc:
            eval_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer(
            input_file=args.eval_file,
            shuffle=False,
            num_samples=args.num_samples,
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)
        
        else:
            eval_data_layer = nemo_nlp.BertSentenceClassificationDataLayer(
            input_file=args.eval_file,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            shuffle=False,
            num_samples=args.num_samples,
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)

        eval_logits, eval_loss, _, eval_labels = create_pipeline(
            nf,
            eval_data_layer,
            bert,
            mlp,
            loss_fn)
        
        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[eval_logits, eval_labels],
            user_iter_callback=lambda x, y: eval_iter_callback(
                x, y, eval_data_layer),
            user_epochs_done_callback=lambda x: eval_epochs_done_callback(
                x, f'{nf.work_dir}/graphs'),
            tb_writer=nf.tb_writer,
            eval_step=steps_per_epoch)

        callbacks.append(eval_callback)
    
    if args.inference_file:
        if args.preproc:
            inference_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer(
            input_file=args.inference_file,
            shuffle=False,
            num_samples=args.num_samples,
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)
        
        else:
            inference_data_layer = nemo_nlp.BertSentenceClassificationDataLayer(
            input_file=args.inference_file,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            shuffle=False,
            num_samples=args.num_samples,
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)

        # TODO: Finish inference
        inference_callback = None 

    # Training, eval and inference
    if args.train_file:
        lr_policy_fn = get_lr_policy(
            args.lr_policy,
            total_steps=args.num_epochs * steps_per_epoch,
            warmup_ratio=args.lr_warmup_proportion)

        nf.train(
            tensors_to_optimize=[train_loss],
            callbacks=callbacks,
            lr_policy=lr_policy_fn,
            optimizer=args.optimizer_kind,
            optimization_params={'num_epochs': args.num_epochs, 'lr': args.lr})