예제 #1
0
 def build_training_data_loader(self):
     train_dataset, _, _ = data.load_and_cache_examples(
         tokenizer=self.tokenizer,
         task=self.context.get_data_config().get("task"),
         max_seq_length=self.context.get_hparam("max_seq_length"),
         doc_stride=self.context.get_hparam("doc_stride"),
         max_query_length=self.context.get_hparam("max_query_length"),
         evaluate=False,
     )
     return DataLoader(train_dataset, batch_size=self.context.get_per_slot_batch_size())
예제 #2
0
    def build_training_data_loader(self) -> DataLoader:
        if not self.data_downloaded:
            self.download_dataset()

        train_dataset = data.load_and_cache_examples(
            base_data_dir=self.download_directory,
            config=self.context.get_data_config(),
            model_type=self.context.get_hparam("model_type"),
            max_seq_length=self.context.get_hparam("max_seq_length"),
            evaluate=False,
        )
        return DataLoader(train_dataset, batch_size=self.context.get_per_slot_batch_size())
예제 #3
0
def evaluate(args, model, tokenizer, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_output_dir = args.output_dir

    eval_dataset = load_and_cache_examples(args, tokenizer, evaluate=True)

    if not os.path.exists(eval_output_dir):
        os.makedirs(eval_output_dir)

    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    model.eval()

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        batch = batch.to(args.device)

        with torch.no_grad():
            outputs = model(batch, labels=batch)
            lm_loss = outputs[0]
            eval_loss += lm_loss.mean().item()
        nb_eval_steps += 1

    eval_loss = eval_loss / nb_eval_steps
    perplexity = torch.exp(torch.tensor(eval_loss))

    result = {"perplexity": perplexity}

    output_eval_file = os.path.join(eval_output_dir, prefix,
                                    "eval_results.txt")
    with open(output_eval_file, "w") as writer:
        logger.info("***** Eval results {} *****".format(prefix))
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))

    return result
예제 #4
0
def train(args):
    logger.info("Creating config and model")
    config = txf.AutoConfig.from_pretrained(args.config_name)
    tokenizer = txf.AutoTokenizer.from_pretrained(
        args.config_name,
        do_lower_case=args.uncased_model,
    )
    model = txf.AutoModelForQuestionAnswering.from_pretrained(
        args.config_name,
        from_tf=bool(".ckpt" in args.config_name),
        config=config,
    )

    # TODO: Multi-GPU
    device = torch.device(
        "cuda" if torch.cuda.is_available() and args.num_gpus else "cpu")

    logger.info("Loading model to %s", device)
    model.to(device)

    logger.info("Creating data loader")
    train_dataset = data.load_and_cache_examples(
        args.train,
        tokenizer,
        args,
        evaluate=False,
        output_examples=False,
    )
    train_dataloader = data.get_dataloader(train_dataset,
                                           args.per_gpu_train_batch_size,
                                           evaluate=False)

    #     if args.max_steps > 0:
    #         t_total = args.max_steps
    #         args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.grad_acc_steps) + 1
    #     else:
    t_total = len(train_dataloader) // args.grad_acc_steps * args.epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.wd,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]
    optimizer = txf.AdamW(optimizer_grouped_parameters,
                          lr=args.lr,
                          eps=args.adam_epsilon)
    scheduler = txf.get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)

    ## Train!
    logger.info("Training model")
    global_step = 1
    epochs_trained = 0
    steps_trained_in_current_epoch = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()

    for epoch in range(args.epochs):
        logger.info(f"[Epoch {epoch}] Starting")
        for step, batch in enumerate(train_dataloader):
            model.train()
            batch = tuple(t.to(device) for t in batch)

            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
                "start_positions": batch[3],
                "end_positions": batch[4],
            }

            if args.model_type in [
                    "xlm", "roberta", "distilbert", "camembert"
            ]:
                del inputs["token_type_ids"]

            if args.model_type in ["xlnet", "xlm"]:
                inputs.update({"cls_index": batch[5], "p_mask": batch[6]})
                if args.has_unanswerable:
                    inputs.update({"is_impossible": batch[7]})
                if hasattr(model, "config") and hasattr(
                        model.config, "lang2id"):
                    inputs.update({
                        "langs":
                        (torch.ones(batch[0].shape, dtype=torch.int64) *
                         args.lang_id).to(device),
                    })

            outputs = model(**inputs)
            # model outputs are always tuple in transformers (see doc)
            loss = outputs[0]

            if args.grad_acc_steps > 1:
                loss = loss / args.grad_acc_steps

            loss.backward()

            tr_loss += loss.item()
            if (step + 1) % args.grad_acc_steps == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.max_grad_norm)

                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                # Log metrics
                if (args.log_interval > 0
                        and global_step % args.log_interval == 0):
                    logstr = "lr={:.5e}; loss={:.5e};".format(
                        scheduler.get_lr()[0],
                        (tr_loss - logging_loss) / args.log_interval)

                    # Only evaluate when single GPU otherwise metrics may not average well
                    if args.validation:
                        logger.info(
                            f"[Epoch {epoch} Global Step {global_step}] Starting evaluation..."
                        )
                        results = evaluate(args,
                                           model,
                                           tokenizer,
                                           device,
                                           prefix=global_step)
                        for key, value in results.items():
                            logstr += f" eval_{key}={value:.5e};"

                    logger.info(
                        f"[Epoch {epoch} Global Step {global_step}] Metrics: {logstr}"
                    )
                    logging_loss = tr_loss

                # Save model checkpoint
                if args.checkpoint_interval > 0 and global_step % args.checkpoint_interval == 0:
                    save_progress(model,
                                  tokenizer,
                                  args,
                                  checkpoint=global_step,
                                  optimizer=optimizer,
                                  scheduler=scheduler)

            if args.max_steps > 0 and global_step > args.max_steps:
                break
        if args.max_steps > 0 and global_step > args.max_steps:
            break

    logger.info("Training complete: Saving model")
    save_progress(model, tokenizer, args)
    return
예제 #5
0
def evaluate(args, model, tokenizer, device, prefix=""):
    eval_dataset, examples, features = data.load_and_cache_examples(
        args.validation,
        tokenizer,
        args,
        evaluate=True,
        output_examples=True,
    )
    eval_dataloader = data.get_dataloader(eval_dataset,
                                          args.per_gpu_eval_batch_size,
                                          evaluate=True)

    all_results = []
    start_time = timeit.default_timer()
    eval_batches = 0

    for batch in eval_dataloader:
        model.eval()
        batch = tuple(t.to(device) for t in batch)
        eval_batches += 1

        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
            }

            if args.model_type in [
                    "xlm", "roberta", "distilbert", "camembert"
            ]:
                del inputs["token_type_ids"]

            feature_indices = batch[3]

            # XLNet and XLM use more arguments for their predictions
            if args.model_type in ["xlnet", "xlm"]:
                inputs.update({"cls_index": batch[4], "p_mask": batch[5]})
                # for lang_id-sensitive xlm models
                if hasattr(model, "config") and hasattr(
                        model.config, "lang2id"):
                    inputs.update({
                        "langs":
                        (torch.ones(batch[0].shape, dtype=torch.int64) *
                         args.lang_id).to(device)
                    })

            outputs = model(**inputs)

        for i, feature_index in enumerate(feature_indices):
            # TODO: i and feature_index are the same number! Simplify by removing enumerate?
            eval_feature = features[feature_index.item()]
            unique_id = int(eval_feature.unique_id)

            output = [to_list(output[i]) for output in outputs]

            # Some models (XLNet, XLM) use 5 arguments for their predictions, while the other "simpler"
            # models only use two.
            if len(output) >= 5:
                start_logits = output[0]
                start_top_index = output[1]
                end_logits = output[2]
                end_top_index = output[3]
                cls_logits = output[4]

                result = SquadResult(
                    unique_id,
                    start_logits,
                    end_logits,
                    start_top_index=start_top_index,
                    end_top_index=end_top_index,
                    cls_logits=cls_logits,
                )

            else:
                start_logits, end_logits = output
                result = SquadResult(unique_id, start_logits, end_logits)

            all_results.append(result)

    evalTime = timeit.default_timer() - start_time
    logger.info("  Evaluation done in total %f secs (%f sec per example)",
                evalTime,
                evalTime / (eval_batches * args.per_gpu_eval_batch_size))

    # Compute predictions
    output_prediction_file = os.path.join(args.output_data_dir,
                                          "predictions_{}.json".format(prefix))
    output_nbest_file = os.path.join(
        args.output_data_dir, "nbest_predictions_{}.json".format(prefix))

    if args.has_unanswerable:
        output_null_log_odds_file = os.path.join(
            args.output_data_dir, "null_odds_{}.json".format(prefix))
    else:
        output_null_log_odds_file = None

    # XLNet and XLM use a more complex post-processing procedure
    if args.model_type in ["xlnet", "xlm"]:
        start_n_top = model.config.start_n_top if hasattr(
            model, "config") else model.module.config.start_n_top
        end_n_top = model.config.end_n_top if hasattr(
            model, "config") else model.module.config.end_n_top

        predictions = squad_metrics.compute_predictions_log_probs(
            examples,
            features,
            all_results,
            args.n_best_size,
            args.max_answer_len,
            output_prediction_file,
            output_nbest_file,
            output_null_log_odds_file,
            start_n_top,
            end_n_top,
            args.has_unanswerable,
            tokenizer,
            logger.level < logging.INFO,
        )
    else:
        predictions = squad_metrics.compute_predictions_logits(
            examples,
            features,
            all_results,
            args.n_best_size,
            args.max_answer_len,
            args.uncased_model,
            output_prediction_file,
            output_nbest_file,
            output_null_log_odds_file,
            logger.level < logging.INFO,
            args.has_unanswerable,
            args.null_score_diff_thresh,
            tokenizer,
        )

    # Compute the F1 and exact scores.
    results = squad_metrics.squad_evaluate(examples, predictions)
    return results
예제 #6
0
 def __init__(self, context: det.TrialContext):
     self.context = context
     self.config_class, self.tokenizer_class, self.model_class = constants.MODEL_CLASSES[
         self.context.get_hparam("model_type")
     ]
     self.tokenizer = self.tokenizer_class.from_pretrained(
         self.context.get_data_config().get("pretrained_model_name"),
         do_lower_case=True,
         cache_dir=None
     )
     self.validation_dataset, self.validation_examples, self.validation_features = data.load_and_cache_examples(
         tokenizer=self.tokenizer,
         task=self.context.get_data_config().get("task"),
         max_seq_length=self.context.get_hparam("max_seq_length"),
         doc_stride=self.context.get_hparam("doc_stride"),
         max_query_length=self.context.get_hparam("max_query_length"),
         evaluate=True,
     )
예제 #7
0
 def build_validation_data_loader(self):
     self.validation_dataset, self.validation_examples, self.validation_features = data.load_and_cache_examples(
         data_dir=self.download_directory,
         tokenizer=self.tokenizer,
         task=self.context.get_data_config().get("task"),
         max_seq_length=self.context.get_hparam("max_seq_length"),
         doc_stride=self.context.get_hparam("doc_stride"),
         max_query_length=self.context.get_hparam("max_query_length"),
         evaluate=True,
     )
     return DataLoader(
         self.validation_dataset,
         batch_size=self.context.get_per_slot_batch_size(),
     )
예제 #8
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument("--train_data_file",
                        default=None,
                        type=str,
                        required=True,
                        help="The input training data file (a text file).")
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written."
    )

    # Other parameters
    parser.add_argument(
        "--eval_data_file",
        default=None,
        type=str,
        help=
        "An optional input evaluation data file to evaluate the perplexity on (a text file)."
    )

    # Available GPT-2 models: gpt2, gpt2-medium, gpt2-large
    parser.add_argument(
        "--model_name_or_path",
        default="gpt2",
        type=str,
        help="The model checkpoint for weights initialization.")

    parser.add_argument(
        "--config_name",
        default="",
        type=str,
        help=
        "Optional pretrained config name or path if not the same as model_name_or_path"
    )
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help=
        "Optional pretrained tokenizer name or path if not the same as model_name_or_path"
    )
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)"
    )
    parser.add_argument(
        "--block_size",
        default=-1,
        type=int,
        help="Optional input sequence length after tokenization."
        "The training dataset will be truncated in block of this size for training."
        "Default to the model max input length for single sentence inputs (take into account special tokens)."
    )
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--evaluate_during_training",
        action='store_true',
        help="Run evaluation during training at each logging step.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")

    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs",
                        default=1.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help=
        "If > 0: set total number of training steps to perform. Override num_train_epochs."
    )
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")

    parser.add_argument("--train_batch_size",
                        default=4,
                        type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--eval_batch_size",
                        default=4,
                        type=int,
                        help="Batch size per GPU/CPU for evaluation.")

    parser.add_argument('--logging_steps',
                        type=int,
                        default=50,
                        help="Log every X updates steps.")
    parser.add_argument('--save_steps',
                        type=int,
                        default=50,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument(
        '--save_total_limit',
        type=int,
        default=None,
        help=
        'Limit the total amount of checkpoints, delete the older checkpoints in the output_dir, does not delete by default'
    )
    parser.add_argument(
        "--eval_all_checkpoints",
        action='store_true',
        help=
        "Evaluate all checkpoints starting with the same prefix as model_name_or_path ending and ending with step number"
    )
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Avoid using CUDA when available")
    parser.add_argument('--overwrite_output_dir',
                        action='store_true',
                        help="Overwrite the content of the output directory")
    parser.add_argument(
        '--overwrite_cache',
        action='store_true',
        help="Overwrite the cached training and evaluation sets")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")

    parser.add_argument(
        '--fp16',
        action='store_true',
        help=
        "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit"
    )
    parser.add_argument(
        '--fp16_opt_level',
        type=str,
        default='O1',
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html")
    parser.add_argument('--server_ip',
                        type=str,
                        default='',
                        help="For distant debugging.")
    parser.add_argument('--server_port',
                        type=str,
                        default='',
                        help="For distant debugging.")
    args = parser.parse_args()

    if args.eval_data_file is None and args.do_eval:
        raise ValueError(
            "Cannot do evaluation without an evaluation data file. Either supply a file to --eval_data_file "
            "or remove the --do_eval argument.")

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir
    ) and args.do_train and not args.overwrite_output_dir:
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    args.n_gpu = torch.cuda.device_count()
    args.device = device

    # Setup logging
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO)
    logger.warning("device: %s, n_gpu: %s, 16-bits training: %s", device,
                   args.n_gpu, args.fp16)

    # Set seed
    set_seed(args)

    config_class, model_class, tokenizer_class = (GPT2Config, GPT2LMHeadModel,
                                                  GPT2Tokenizer)
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path)
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case)
    if args.block_size <= 0:
        # Our input block size will be the max possible for the model
        args.block_size = tokenizer.max_len_single_sentence
        print("Block size:", args.block_size)
    args.block_size = min(args.block_size, tokenizer.max_len_single_sentence)
    model = model_class.from_pretrained(
        args.model_name_or_path,
        from_tf=bool('.ckpt' in args.model_name_or_path),
        config=config)
    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                tokenizer,
                                                evaluate=False)

        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Saving best-practices: if you use save_pretrained for the model and tokenizer, you can reload them using from_pretrained()
    if args.do_train:
        # Create output directory if needed
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        # Take care of distributed/parallel training
        model_to_save = model.module if hasattr(model, 'module') else model
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))

        # Load a trained model and vocabulary that you have fine-tuned
        model = model_class.from_pretrained(args.output_dir)
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        model.to(args.device)

    # Evaluation
    results = {}
    if args.do_eval:
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                '-')[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split(
                '/')[-1] if checkpoint.find('checkpoint') != -1 else ""

            model = model_class.from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args, model, tokenizer, prefix=prefix)
            result = dict(
                (k + '_{}'.format(global_step), v) for k, v in result.items())
            results.update(result)

    return results
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--data_dir", default=None, type=str, required=True,
                        help="The input data dir. Should contain the .tsv files (or other data files) for the task.")

    ## Other parameters
    parser.add_argument("--model_path", default="bert-base-uncased", type=str,
                        help="Pre-trained BERT model to extend e.g. bert-base-uncased.")
    parser.add_argument("--model", default=None, type=str,
                        help="Type of speech grader model: ['lstm', 'bert']")
    parser.add_argument("--max_score", default=6, type=float,
                         help="Maximum score that an example can be awarded (the default value used is 6, inline with CEFR levels).")
    parser.add_argument('--special_tokens', default=[], type=str, action='append',
                        help='Special tokens to mask when making auxiliary objective predictions. These are also denoted as special tokens for the BERT tokenizer.')

    parser.add_argument("--max_seq_length", default=128, type=int,
                        help="The maximum total input sequence length after tokenization. Sequences longer "
                             "than this will be truncated, sequences shorter will be padded.")
    parser.add_argument("--do_train", action='store_true',
                        help="Train a model.")
    parser.add_argument("--do_test", action='store_true',
                        help="Evaluate the model at --model_dir on the test set.")
    parser.add_argument('--logging_steps', type=int, default=50,
                        help="Log every X updates steps.")
    parser.add_argument("--evaluate_during_training", action='store_true',
                        help="Run evaluation during training at each logging step.")
    parser.add_argument('--save_best_on_evaluate', action='store_true',
                        help="Save best model based on evaluation scoring loss.")
    parser.add_argument("--save_all_checkpoints", action='store_true',
                       help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number")

    parser.add_argument("--train_batch_size", default=8, type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--eval_batch_size", default=8, type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--learning_rate", default=5e-5, type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay", default=0.0, type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs", default=3.0, type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--max_steps", default=-1, type=int,
                        help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
    parser.add_argument("--warmup_steps", default=0, type=int,
                        help="Linear warmup over warmup_steps.")

    parser.add_argument("--output_dir", default=None, type=str,
                        help="The output directory where the model predictions and checkpoints will be written during training.")
    parser.add_argument("--model_dir", default=None, type=str,
                        help="The directory where the model files are stored (used for testing).")
    parser.add_argument("--model_args_dir", default=None, type=str,
                        help="The directory where the model args are stored (used for testing).")
    parser.add_argument('--overwrite_cache', action='store_true',
                       help="Overwrite the cached training, validation and testing sets")
    parser.add_argument('--seed', type=int, default=42,
                        help="random seed for initialization")
    parser.add_argument('--predictions-file', type=str, default=None)

    # Auxiliary objectives
    for obj in aux_objs:
        parser.add_argument('--use_{}_objective'.format(obj), action='store_true',
                            help='Use {} objective during training. --{}_alpha must also be set'.format(obj, obj))
        parser.add_argument('--{}_alpha'.format(obj), type=float, default=0.0,
                            help='Weighting of {} objective in loss score. All alpha values must add to 1'.format(obj))
    parser.add_argument('--score_alpha', type=float, default=1.0,
                        help='Weighting of scoring objective in loss score. All alpha values must add to 1')


    # Parsing args
    args = parser.parse_args()
    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if args.save_best_on_evaluate and not args.evaluate_during_training:
        args.logger.info('Cannot save best model if not evaluating')
        return

    # Setup logging
    logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt = '%m/%d/%Y %H:%M:%S',
                        level = logging.INFO)

    # Train a model
    if args.do_train:
        # Store training arguments to facilitate reloading a model.
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))
        args.logger = logger

        if not args.model:
            args.logger.info("--model must be provided for training (['lstm', 'bert'])")
            return
        if args.model == 'lstm':
            vocab = data.load_and_cache_vocab(args.data_dir, logger)
            training_objectives = get_auxiliary_objectives(args, len(vocab))
            grader = lstm_model.SpeechGraderModel(args, vocab, training_objectives).to(args.device)
            train_data = data.load_and_cache_examples(
                args.model, args.data_dir, args.max_seq_length, args.special_tokens,
                logger, vocab=vocab, reload=args.overwrite_cache)
            dev_data = data.load_and_cache_examples(
                args.model, args.data_dir, args.max_seq_length, args.special_tokens,
                logger, vocab=vocab, evaluate=True, reload=args.overwrite_cache)
            trainer = train.Trainer(args, grader, training_objectives)
        elif args.model == 'bert':
            tokenizer = BertTokenizer.from_pretrained(args.model_path, additional_special_tokens=args.special_tokens)
            config = BertConfig.from_pretrained(args.model_path)
            training_objectives = get_auxiliary_objectives(args, tokenizer.vocab_size)
            config.training_objectives = training_objectives
            config.max_score = args.max_score
            grader = bert_model.SpeechGraderModel(config=config).to(args.device)
            train_data = data.load_and_cache_examples(
                args.model, args.data_dir, args.max_seq_length, args.special_tokens,
                logger, tokenizer=tokenizer, reload=args.overwrite_cache)
            dev_data = data.load_and_cache_examples(
                args.model, args.data_dir, args.max_seq_length, args.special_tokens,
                logger, tokenizer=tokenizer, evaluate=True, reload=args.overwrite_cache)
            trainer = train.Trainer(args, grader, training_objectives, bert_tokenizer=tokenizer)
        else:
            args.logger.info("--model must be either 'lstm' or 'bert'")
            return
        trainer.train(train_data, dev_data)

    # Test a model
    if args.do_test:
        # Retrieve training arguments to facilitate reloading the model.
        args.logger = logger
        train_args = torch.load(os.path.join(args.model_args_dir, 'training_args.bin'))
        train_args.predictions_file = args.predictions_file
        train_args.logger = logger

        if train_args.model == 'lstm':
            # use the vocabulary from train time
            vocab = data.load_and_cache_vocab(train_args.data_dir, logger)
            training_objectives = get_auxiliary_objectives(train_args, len(vocab))
            grader = lstm_model.SpeechGraderModel(args, vocab, training_objectives).to(args.device)
            grader.load_state_dict(torch.load(os.path.join(args.model_dir, 'lstm.model')))
            test_data = data.load_and_cache_examples(
                train_args.model, args.data_dir, train_args.max_seq_length, train_args.special_tokens,
                logger, vocab=vocab, test=True, reload=args.overwrite_cache)
            trainer = train.Trainer(train_args, grader, training_objectives)

        else:
            tokenizer = BertTokenizer.from_pretrained(args.model_dir, do_lower_case=True)
            training_objectives = get_auxiliary_objectives(train_args, tokenizer.vocab_size)
            config = BertConfig.from_pretrained(args.model_dir)
            grader = bert_model.SpeechGraderModel.from_pretrained(args.model_dir, config=config).to(args.device)
            test_data = data.load_and_cache_examples(
                train_args.model, args.data_dir, train_args.max_seq_length, train_args.special_tokens,
                logger, tokenizer=tokenizer, test=True, reload=args.overwrite_cache)
            trainer = train.Trainer(train_args, grader, training_objectives, bert_tokenizer=tokenizer)
        trainer.test(test_data)
예제 #10
0
def main():
    args = parse_args()

    if os.path.exists(args.output_data_dir) and os.listdir(
            args.output_data_dir
    ) and args.do_train and not args.overwrite_output_dir:
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_data_dir))

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend='nccl')
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank, device, args.n_gpu, bool(args.local_rank != -1),
        args.fp16)

    # Set seed
    set_seed(args)

    # Prepare GLUE task
    args.task_name = args.task_name.lower()
    if args.task_name not in processors:
        raise ValueError("Task not found: %s" % (args.task_name))
    processor = processors[args.task_name]()
    args.output_mode = output_modes[args.task_name]
    label_list = processor.get_labels()
    num_labels = len(label_list)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    print(MODEL_CLASSES[args.model_type])
    config = config_class.from_pretrained(args.config_name,
                                          num_labels=num_labels,
                                          finetuning_task=args.task_name)
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name, do_lower_case=args.do_lower_case)
    model = model_class.from_pretrained(args.model_name,
                                        from_tf=False,
                                        config=config)

    if args.local_rank == 0:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    print(args.do_train)
    train_flag = True
    if train_flag:
        train_dataset = data.load_and_cache_examples(args,
                                                     args.task_name,
                                                     tokenizer,
                                                     evaluate=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
    if train_flag and (args.local_rank == -1
                       or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_data_dir) and args.local_rank in [
                -1, 0
        ]:
            os.makedirs(args.output_data_dir)

        logger.info("Saving model checkpoint to %s", args.output_data_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = model.module if hasattr(
            model,
            'module') else model  # Take care of distributed/parallel training
        #model_to_save.save_pretrained(args.output_data_dir)
        #tokenizer.save_pretrained(args.output_data_dir)
        model_to_save.save_pretrained(args.model_dir)
        tokenizer.save_pretrained(args.model_dir)

        # Good practice: save your training arguments together with the trained model
        #torch.save(args, os.path.join(args.output_data_dir, 'training_args.bin'))
        torch.save(args, os.path.join(args.model_dir, 'training_args.bin'))

        # Load a trained model and vocabulary that you have fine-tuned
        model = model_class.from_pretrained(args.model_dir)
        tokenizer = tokenizer_class.from_pretrained(args.model_dir)
        model.to(args.device)

    # Evaluation
    eval_flag = True
    eval_all_checkpoints = True
    do_lower_case = True
    results = {}
    if eval_flag and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(
            args.model_dir, do_lower_case=do_lower_case)
        logger.info("******Output dir folders: %s",
                    os.listdir(args.output_data_dir))
        checkpoints = [args.output_data_dir]
        if eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_data_dir + '/**/' + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                '-')[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split(
                '/')[-1] if checkpoint.find('checkpoint') != -1 else ""

            model = model_class.from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args, model, tokenizer, prefix=prefix)
            result = dict(
                (k + '_{}'.format(global_step), v) for k, v in result.items())
            results.update(result)

    print(results)

    print('COOL')
예제 #11
0
def evaluate(args, model, tokenizer, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (
        args.task_name, )
    eval_outputs_dirs = (args.output_data_dir, args.output_data_dir +
                         '-MM') if args.task_name == "mnli" else (
                             args.output_data_dir, )

    results = {}
    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset = data.load_and_cache_examples(args,
                                                    eval_task,
                                                    tokenizer,
                                                    evaluate=True)

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        # multi-gpu eval
        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)

        print("***** Running evaluation {} *****".format(prefix))
        print("  Num examples = %d", len(eval_dataset))
        print("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)

            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'labels': batch[3]
                }
                if args.model_type != 'distilbert':
                    inputs['token_type_ids'] = batch[2] if args.model_type in [
                        'bert', 'xlnet'
                    ] else None  # XLM, DistilBERT and RoBERTa don't use segment_ids
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs['labels'].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps

        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)
        #result! = compute_metrics(eval_task, preds, out_label_ids)
        result = compute_metrics("sst-2", preds, out_label_ids)
        results.update(result)

        output_eval_file = os.path.join(eval_output_dir, prefix,
                                        "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results {} *****".format(prefix))
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    return results