Ejemplo n.º 1
0
        user_iter_callback=lambda x, y: eval_iter_callback(x, y),
        user_epochs_done_callback=lambda x: eval_epochs_done_callback(
            x, label_ids, f'{nf.work_dir}/graphs'),
        tb_writer=nf.tb_writer,
        eval_step=args.eval_step_freq
        if args.eval_step_freq > 0 else steps_per_epoch,
    )
    callbacks.append(eval_callback)

ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir,
                                             epoch_freq=args.save_epoch_freq,
                                             step_freq=args.save_step_freq)
callbacks.append(ckpt_callback)

lr_policy_fn = get_lr_policy(args.lr_policy,
                             total_steps=args.num_epochs * steps_per_epoch,
                             warmup_ratio=args.lr_warmup_proportion)

nf.train(
    tensors_to_optimize=[train_loss],
    callbacks=callbacks,
    lr_policy=lr_policy_fn,
    batches_per_step=args.batches_per_step,
    optimizer=args.optimizer_kind,
    optimization_params={
        "num_epochs": args.num_epochs,
        "lr": args.lr,
        "weight_decay": args.weight_decay
    },
)
Ejemplo n.º 2
0
    def test_squad_v1(self):
        version_2_with_negative = False
        pretrained_bert_model = 'bert-base-uncased'
        batch_size = 3
        data_dir = os.path.abspath(
            os.path.join(os.path.dirname(__file__), 'data/nlp/squad/v1.1'))
        max_query_length = 64
        max_seq_length = 384
        doc_stride = 128
        max_steps = 100
        lr_warmup_proportion = 0
        eval_step_freq = 50
        lr = 3e-6
        do_lower_case = True
        n_best_size = 5
        max_answer_length = 20
        null_score_diff_threshold = 0.0

        tokenizer = nemo_nlp.NemoBertTokenizer(pretrained_bert_model)
        neural_factory = nemo.core.NeuralModuleFactory(
            backend=nemo.core.Backend.PyTorch,
            local_rank=None,
            create_tb_writer=False)
        model = nemo_nlp.huggingface.BERT(
            pretrained_model_name=pretrained_bert_model)
        hidden_size = model.local_parameters["hidden_size"]
        qa_head = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
                                           num_classes=2,
                                           num_layers=1,
                                           log_softmax=False)
        squad_loss = nemo_nlp.QuestionAnsweringLoss()

        data_layer = nemo_nlp.BertQuestionAnsweringDataLayer(
            mode='train',
            version_2_with_negative=version_2_with_negative,
            batch_size=batch_size,
            tokenizer=tokenizer,
            data_dir=data_dir,
            max_query_length=max_query_length,
            max_seq_length=max_seq_length,
            doc_stride=doc_stride)

        input_ids, input_type_ids, input_mask, \
            start_positions, end_positions, _ = data_layer()

        hidden_states = model(input_ids=input_ids,
                              token_type_ids=input_type_ids,
                              attention_mask=input_mask)

        qa_output = qa_head(hidden_states=hidden_states)
        loss, _, _ = squad_loss(logits=qa_output,
                                start_positions=start_positions,
                                end_positions=end_positions)

        data_layer_eval = nemo_nlp.BertQuestionAnsweringDataLayer(
            mode='dev',
            version_2_with_negative=version_2_with_negative,
            batch_size=batch_size,
            tokenizer=tokenizer,
            data_dir=data_dir,
            max_query_length=max_query_length,
            max_seq_length=max_seq_length,
            doc_stride=doc_stride)
        input_ids_eval, input_type_ids_eval, input_mask_eval, \
            start_positions_eval, end_positions_eval, unique_ids_eval \
            = data_layer_eval()

        hidden_states_eval = model(input_ids=input_ids_eval,
                                   token_type_ids=input_type_ids_eval,
                                   attention_mask=input_mask_eval)

        qa_output_eval = qa_head(hidden_states=hidden_states_eval)
        _, start_logits_eval, end_logits_eval = squad_loss(
            logits=qa_output_eval,
            start_positions=start_positions_eval,
            end_positions=end_positions_eval)
        eval_output = [start_logits_eval, end_logits_eval, unique_ids_eval]

        callback_train = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: print("Loss: {:.3f}".format(x[0].item())),
            get_tb_values=lambda x: [["loss", x[0]]],
            step_freq=10,
            tb_writer=neural_factory.tb_writer)

        callbacks_eval = nemo.core.EvaluatorCallback(
            eval_tensors=eval_output,
            user_iter_callback=lambda x, y: eval_iter_callback(x, y),
            user_epochs_done_callback=lambda x: eval_epochs_done_callback(
                x,
                eval_data_layer=data_layer_eval,
                do_lower_case=do_lower_case,
                n_best_size=n_best_size,
                max_answer_length=max_answer_length,
                version_2_with_negative=version_2_with_negative,
                null_score_diff_threshold=null_score_diff_threshold),
            tb_writer=neural_factory.tb_writer,
            eval_step=eval_step_freq)

        lr_policy_fn = get_lr_policy('WarmupAnnealing',
                                     total_steps=max_steps,
                                     warmup_ratio=lr_warmup_proportion)

        neural_factory.train(tensors_to_optimize=[loss],
                             callbacks=[callback_train, callbacks_eval],
                             lr_policy=lr_policy_fn,
                             optimizer='adam_w',
                             optimization_params={
                                 "max_steps": max_steps,
                                 "lr": lr
                             })
Ejemplo n.º 3
0
                eval_step=args.eval_step_freq,
            )
            callbacks.append(eval_callback)

        optimization_params = {
            "lr": args.lr,
            "weight_decay": args.weight_decay,
        }
        if args.max_steps < 0:
            total_steps = args.num_epochs * train_steps_per_epoch
            optimization_params['num_epochs'] = args.num_epochs
        else:
            total_steps = args.max_steps
            optimization_params['max_steps'] = args.max_steps

        lr_policy_fn = get_lr_policy(args.lr_policy, total_steps=total_steps, warmup_ratio=args.lr_warmup_proportion)

        if args.grad_norm_clip >= 0:
            optimization_params['grad_norm_clip'] = args.grad_norm_clip

        nf.train(
            tensors_to_optimize=[train_loss],
            callbacks=callbacks,
            lr_policy=lr_policy_fn,
            optimizer=args.optimizer,
            batches_per_step=args.batches_per_step,
            optimization_params=optimization_params,
        )

    else:
        load_from_folder = None
    user_epochs_done_callback=lambda x: eval_epochs_done_callback(
        x, validation_dataset=eval_dataset_tgt),
    eval_step=args.eval_freq,
    tb_writer=nf.tb_writer)

# callback which saves checkpoints once in a while
ckpt_dir = nf.checkpoint_dir if not args.interactive \
    else args.restore_checkpoint_from
ckpt_callback = nemo.core.CheckpointCallback(folder=ckpt_dir,
                                             epoch_freq=args.save_epoch_freq,
                                             step_freq=args.save_step_freq,
                                             checkpoints_to_keep=1)

# define learning rate decay policy
lr_policy_fn = get_lr_policy(args.lr_policy,
                             total_steps=args.max_steps,
                             warmup_steps=args.warmup_steps)

if args.max_steps is not None and args.num_epochs is not None:
    raise ValueError("Please specify either max_steps or num_epochs.")

if not args.interactive:

    if args.max_steps is not None:
        stop_training_condition = {"max_steps": args.max_steps}
    else:
        stop_training_condition = {"num_epochs": args.num_epochs}

    nf.train(tensors_to_optimize=[train_loss],
             callbacks=[train_callback, eval_callback, ckpt_callback],
             optimizer=args.optimizer,
Ejemplo n.º 5
0
def sentence_classification(args):
    # TODO: construct name of experiment based on args
    """
    name = construct_name(
            args.exp_name,
            args.lr,
            args.batch_size,
            args.num_epochs,
            args.weight_decay,
            args.optimizer)
    work_dir = name
    if args.work_dir:
        work_dir = os.path.join(args.work_dir, name)
    """
    # Instantiate neural modules
    nf = NeuralModuleFactory(
        backend=nemo.core.Backend.PyTorch,
        local_rank=args.local_rank,
        optimization_level=args.amp_opt_level,
        log_dir=args.work_dir,
        create_tb_writer=True,
        files_to_copy=[__file__],
        add_time_to_log_dir=True)

    # Pre-trained BERT
    tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)

    if args.bert_checkpoint is None:
        bert = nemo_nlp.BERT(pretrained_model_name=args.pretrained_bert_model)
        # save bert config for inference after fine-tuning
        bert_config = bert.config.to_dict()
        with open(args.work_dir + '/' + args.pretrained_bert_model + '_config.json', 'w+') as json_file:
            json.dump(bert_config, json_file)
    else:
        if args.bert_config is not None:
            with open(args.bert_config) as json_file:
                bert_config = json.load(json_file)
        bert = nemo_nlp.BERT(**bert_config)
        bert.restore_from(args.bert_checkpoint)

    # MLP
    bert_hidden_size = bert.local_parameters['hidden_size']
    mlp = nemo_nlp.SequenceClassifier(
        hidden_size=bert_hidden_size,
        num_classes=args.num_classes,
        num_layers=args.num_layers,
        log_softmax=False,
        dropout=args.dropout)

    # TODO: save mlp/all model configs (bake in to Neural Module?)

    if args.mlp_checkpoint:
        mlp.restore_from(args.mlp_checkpoint)
    
    # Loss function for classification
    loss_fn = CrossEntropyLoss()

    # Data layers, pipelines, and callbacks
    callbacks = [] # callbacks depend on files present

    if args.train_file:
        if args.preproc:
            train_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer(
            input_file=args.train_file,
            shuffle=True,
            num_samples=args.num_samples, # lower for dev, -1 for all dataset
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)

        else:
            train_data_layer = nemo_nlp.BertSentenceClassificationDataLayer(
            input_file=args.train_file,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            shuffle=True,
            num_samples=args.num_samples, # lower for dev, -1 for all dataset
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)

        train_logits, train_loss, steps_per_epoch, train_labels = create_pipeline(
            nf,
            train_data_layer,
            bert,
            mlp,
            loss_fn)

        train_callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[train_loss, train_logits],
            print_func=lambda x: nf.logger.info(f'Train loss: {str(np.round(x[0].item(), 3))}'),
            tb_writer=nf.tb_writer,
            get_tb_values=lambda x: [["train_loss", x[0]]],
            step_freq=steps_per_epoch)

        callbacks.append(train_callback)

        if args.num_checkpoints != 0:
            ckpt_callback = nemo.core.CheckpointCallback(
                folder=nf.checkpoint_dir,
                epoch_freq=args.save_epoch_freq,
                step_freq=args.save_step_freq,
                checkpoints_to_keep=args.num_checkpoints)
            
            callbacks.append(ckpt_callback)
        

    if args.eval_file:
        if args.preproc:
            eval_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer(
            input_file=args.eval_file,
            shuffle=False,
            num_samples=args.num_samples,
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)
        
        else:
            eval_data_layer = nemo_nlp.BertSentenceClassificationDataLayer(
            input_file=args.eval_file,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            shuffle=False,
            num_samples=args.num_samples,
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)

        eval_logits, eval_loss, _, eval_labels = create_pipeline(
            nf,
            eval_data_layer,
            bert,
            mlp,
            loss_fn)
        
        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[eval_logits, eval_labels],
            user_iter_callback=lambda x, y: eval_iter_callback(
                x, y, eval_data_layer),
            user_epochs_done_callback=lambda x: eval_epochs_done_callback(
                x, f'{nf.work_dir}/graphs'),
            tb_writer=nf.tb_writer,
            eval_step=steps_per_epoch)

        callbacks.append(eval_callback)
    
    if args.inference_file:
        if args.preproc:
            inference_data_layer = preproc_data_layer.PreprocBertSentenceClassificationDataLayer(
            input_file=args.inference_file,
            shuffle=False,
            num_samples=args.num_samples,
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)
        
        else:
            inference_data_layer = nemo_nlp.BertSentenceClassificationDataLayer(
            input_file=args.inference_file,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            shuffle=False,
            num_samples=args.num_samples,
            batch_size=args.batch_size,
            num_workers=0,
            local_rank=args.local_rank)

        # TODO: Finish inference
        inference_callback = None 

    # Training, eval and inference
    if args.train_file:
        lr_policy_fn = get_lr_policy(
            args.lr_policy,
            total_steps=args.num_epochs * steps_per_epoch,
            warmup_ratio=args.lr_warmup_proportion)

        nf.train(
            tensors_to_optimize=[train_loss],
            callbacks=callbacks,
            lr_policy=lr_policy_fn,
            optimizer=args.optimizer_kind,
            optimization_params={'num_epochs': args.num_epochs, 'lr': args.lr})
Ejemplo n.º 6
0
    get_tb_values=lambda x: [["train_loss", x[0]]],
    step_freq=steps_per_epoch)

eval_callback = nemo.core.EvaluatorCallback(
    eval_tensors=[val_logits, val_labels],
    user_iter_callback=lambda x, y: eval_iter_callback(x, y, val_data),
    user_epochs_done_callback=lambda x: eval_epochs_done_callback(
        x, f'{nf.work_dir}/graphs'),
    tb_writer=nf.tb_writer,
    eval_step=steps_per_epoch)

ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir,
                                             epoch_freq=1)

lr_policy_fn = get_lr_policy('WarmupAnnealing',
                             total_steps=NUM_EPOCHS * steps_per_epoch,
                             warmup_ratio=0.1)

nf.train(
    tensors_to_optimize=[train_loss],
    callbacks=[train_callback, eval_callback, ckpt_callback],
    lr_policy=lr_policy_fn,
    optimizer=OPTIMIZER,
    optimization_params={
        "num_epochs": NUM_EPOCHS,
        "lr": LEARNING_RATE,
        "weight_decay": WEIGHT_DECAY
    },
)

#ask if inference file is to be created from the test set