コード例 #1
0
    def __init__(self, config):
        with open(config, "r") as stream:
            hyper_params = load(stream, Loader=yaml.FullLoader)

        ckpt_to_resume = hyper_params["ckpt_to_resume"]
        tokenizer_name = hyper_params["tokenizer_name"]
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
        model = load_model(hyper_params, tokenizer, False)

        self.ret_trainee = RetrieverTrainer(model, None, None, None, None,
                                            None)

        model_ckpt = torch.load(ckpt_to_resume,
                                map_location=torch.device("cpu"))
        self.ret_trainee.load_state_dict(model_ckpt["state_dict"])
        self.model = self.ret_trainee.retriever
        self.source2embedded_passages = {}
コード例 #2
0
def init_model(hyper_params, num_workers, output, validation_interval, gpu,
               no_model_restoring, debug):

    check_and_log_hp([
        'train_file', 'dev_files', 'test_file', 'batch_size', 'tokenizer_name',
        'model', 'max_question_len', 'max_paragraph_len', 'patience',
        'gradient_clipping', 'max_epochs', 'loss_type', 'optimizer',
        'precision', 'accumulate_grad_batches', 'seed'
    ], hyper_params)

    if hyper_params['seed'] is not None:
        # fix the seed
        torch.manual_seed(hyper_params['seed'])
        np.random.seed(hyper_params['seed'])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    tokenizer_name = hyper_params['tokenizer_name']
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    ret = load_model(hyper_params, tokenizer, debug)

    os.makedirs(output, exist_ok=True)
    checkpoint_callback = ModelCheckpoint(filepath=os.path.join(
        output, '{epoch}-{val_acc_0:.2f}-{val_loss_0:.2f}'),
                                          save_top_k=1,
                                          verbose=True,
                                          monitor='val_acc_0',
                                          mode='max',
                                          period=0)
    early_stopping = EarlyStopping('val_acc_0',
                                   mode='max',
                                   patience=hyper_params['patience'])

    if (hyper_params['model'].get('name') == 'bert_encoder'
            and hyper_params['model'].get('cache_size', 0) > 0):
        cbs = [CacheManagerCallback(ret, output)]
    else:
        cbs = []

    if hyper_params['precision'] not in {16, 32}:
        raise ValueError('precision should be either 16 or 32')
    if not no_model_restoring:
        ckpt_to_resume = try_to_restore_model_weights(output)
    else:
        ckpt_to_resume = None
        logger.info(
            'will not try to restore previous models because --no-model-restoring'
        )
    tb_logger = loggers.TensorBoardLogger('experiment_logs')
    for hparam in list(hyper_params):
        tb_logger.experiment.add_text(hparam, str(hyper_params[hparam]))

    trainer = pl.Trainer(
        logger=tb_logger,
        gpus=gpu,
        distributed_backend='dp',
        val_check_interval=validation_interval,
        min_epochs=1,
        gradient_clip_val=hyper_params['gradient_clipping'],
        checkpoint_callback=checkpoint_callback,
        early_stop_callback=early_stopping,
        callbacks=cbs,
        precision=hyper_params['precision'],
        resume_from_checkpoint=ckpt_to_resume,
        accumulate_grad_batches=hyper_params['accumulate_grad_batches'],
        max_epochs=hyper_params['max_epochs'])

    dev_dataloaders, test_dataloader, train_dataloader = get_data_loaders(
        hyper_params, num_workers, tokenizer)

    ret_trainee = RetrieverTrainer(ret, train_dataloader, dev_dataloaders,
                                   test_dataloader, hyper_params['loss_type'],
                                   hyper_params['optimizer'])
    return ckpt_to_resume, ret_trainee, trainer
コード例 #3
0
ファイル: main.py プロジェクト: kiminh/bert_reranker
def init_model(
    hyper_params,
    num_workers,
    output,
    validation_interval,
    gpu,
    no_model_restoring,
    debug,
    print_sentence_stats
):

    check_and_log_hp(
        [
            "train_file",
            "dev_files",
            "test_file",
            "batch_size",
            "tokenizer_name",
            "model",
            "max_question_len",
            "max_paragraph_len",
            "patience",
            "gradient_clipping",
            "max_epochs",
            "loss_type",
            "optimizer",
            "precision",
            "accumulate_grad_batches",
            "seed",
            "logging",
            "keep_ood"
        ],
        hyper_params,
    )

    if hyper_params["seed"] is not None:
        # fix the seed
        torch.manual_seed(hyper_params["seed"])
        np.random.seed(hyper_params["seed"])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    tokenizer_name = hyper_params["tokenizer_name"]
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    ret = load_model(hyper_params, tokenizer, debug)

    os.makedirs(output, exist_ok=True)
    checkpoint_callback = ModelCheckpoint(
        filepath=os.path.join(output, "{epoch}-{val_acc_0:.2f}-{val_loss_0:.2f}"),
        save_top_k=1,
        verbose=True,
        monitor="val_acc_0",
        mode="max",
        period=0,
    )
    early_stopping = EarlyStopping(
        "val_acc_0", mode="max", patience=hyper_params["patience"]
    )

    if (
        hyper_params["model"].get("name") == "bert_encoder"
        and hyper_params["model"].get("cache_size", 0) > 0
    ):
        cbs = [CacheManagerCallback(ret, output)]
    else:
        cbs = []

    if hyper_params["precision"] not in {16, 32}:
        raise ValueError("precision should be either 16 or 32")
    if not no_model_restoring:
        ckpt_to_resume = try_to_restore_model_weights(output)
    else:
        ckpt_to_resume = None
        logger.info(
            "will not try to restore previous models because --no-model-restoring"
        )
    if hyper_params["logging"]["logger"] == "tensorboard":
        pl_logger = loggers.TensorBoardLogger("experiment_logs")
        for hparam in list(hyper_params):
            pl_logger.experiment.add_text(hparam, str(hyper_params[hparam]))
    elif hyper_params["logging"]["logger"] == "wandb":
        orion_trial_id = os.environ.get('ORION_TRIAL_ID')
        name = orion_trial_id if orion_trial_id else hyper_params["logging"]["name"]
        pl_logger = WandbLogger(
            name=name,
            project=hyper_params["logging"]["project"],
            group=hyper_params["logging"]["group"],
        )
        pl_logger.log_hyperparams(hyper_params)
    else:
        raise ValueError(
            logger.info(
                "logger {} is not implemnted".format(hyper_params["logging"]["logger"])
            )
        )

    trainer = pl.Trainer(
        logger=pl_logger,
        gpus=gpu,
        distributed_backend="dp",
        val_check_interval=validation_interval,
        min_epochs=1,
        gradient_clip_val=hyper_params["gradient_clipping"],
        checkpoint_callback=checkpoint_callback,
        early_stop_callback=early_stopping,
        callbacks=cbs,
        precision=hyper_params["precision"],
        resume_from_checkpoint=ckpt_to_resume,
        accumulate_grad_batches=hyper_params["accumulate_grad_batches"],
        max_epochs=hyper_params["max_epochs"],
    )

    dev_dataloaders, test_dataloader, train_dataloader = get_data_loaders(
        hyper_params, num_workers, tokenizer
    )

    if print_sentence_stats:
        evaluate_tokenizer_cutoff(
            hyper_params["train_file"],
            tokenizer,
            hyper_params["max_question_len"],
            hyper_params["max_paragraph_len"],
        )

    ret_trainee = RetrieverTrainer(
        ret,
        train_dataloader,
        dev_dataloaders,
        test_dataloader,
        hyper_params["loss_type"],
        hyper_params["optimizer"],
    )
    return ckpt_to_resume, ret_trainee, trainer
コード例 #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--config',
        help='config file with generic hyper-parameters,  such as optimizer, '
        'batch_size, ... -  in yaml format',
        required=True)
    parser.add_argument(
        '--gpu',
        help='list of gpu ids to use. default is cpu. example: --gpu 0 1',
        type=int,
        nargs='+',
        default=0)
    parser.add_argument('--validation-interval',
                        help='how often to run validation in one epoch - '
                        'e.g., 0.5 means halfway - default 0.5',
                        type=float,
                        default=0.5)
    parser.add_argument('--output',
                        help='where to store models',
                        required=True)
    parser.add_argument('--no-model-restoring',
                        help='will not restore any previous model weights ('
                        'even if present)',
                        action='store_true')
    parser.add_argument('--train',
                        help='will not train - will just evaluate on dev',
                        action='store_true')
    parser.add_argument('--validate',
                        help='will not train - will just evaluate on dev',
                        action='store_true')
    parser.add_argument(
        '--predict',
        help='will predict on the json file you provide as an arg')
    parser.add_argument('--predict-to',
                        help='(optiona) write predictions here)')
    parser.add_argument('--redirect-log',
                        help='will intercept any stdout/err and log it',
                        action='store_true')
    parser.add_argument('--debug',
                        help='will log more info',
                        action='store_true')
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    if args.redirect_log:
        sys.stdout = LoggerWriter(logger.info)
        sys.stderr = LoggerWriter(logger.warning)

    with open(args.config, 'r') as stream:
        hyper_params = load(stream, Loader=yaml.FullLoader)

    check_and_log_hp([
        'train_file', 'dev_files', 'test_file', 'cache_folder', 'batch_size',
        'tokenizer_name', 'model', 'max_question_len', 'max_paragraph_len',
        'patience', 'gradient_clipping', 'max_epochs', 'loss_type',
        'optimizer', 'precision', 'accumulate_grad_batches', 'seed'
    ], hyper_params)

    if hyper_params['seed'] is not None:
        # fix the seed
        torch.manual_seed(hyper_params['seed'])
        np.random.seed(hyper_params['seed'])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    os.makedirs(hyper_params['cache_folder'], exist_ok=True)

    tokenizer_name = hyper_params['tokenizer_name']
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

    train_dataloader = generate_dataloader(hyper_params['train_file'],
                                           hyper_params['cache_folder'],
                                           hyper_params['max_question_len'],
                                           hyper_params['max_paragraph_len'],
                                           tokenizer,
                                           hyper_params['batch_size'])

    dev_dataloaders = []
    for dev_file in hyper_params['dev_files'].values():
        dev_dataloaders.append(
            generate_dataloader(dev_file, hyper_params['cache_folder'],
                                hyper_params['max_question_len'],
                                hyper_params['max_paragraph_len'], tokenizer,
                                hyper_params['batch_size']))

    test_dataloader = generate_dataloader(hyper_params['test_file'],
                                          hyper_params['cache_folder'],
                                          hyper_params['max_question_len'],
                                          hyper_params['max_paragraph_len'],
                                          tokenizer,
                                          hyper_params['batch_size'])

    ret = load_model(hyper_params, tokenizer, args.debug)

    os.makedirs(args.output, exist_ok=True)
    checkpoint_callback = ModelCheckpoint(filepath=os.path.join(
        args.output, '{epoch}-{val_acc_0:.2f}-{val_loss_0:.2f}'),
                                          save_top_k=1,
                                          verbose=True,
                                          monitor='val_acc_0',
                                          mode='max')

    early_stopping = EarlyStopping('val_acc_0',
                                   mode='max',
                                   patience=hyper_params['patience'])

    if hyper_params['precision'] not in {16, 32}:
        raise ValueError('precision should be either 16 or 32')

    if not args.no_model_restoring:
        ckpt_to_resume = try_to_restore_model_weights(args.output)
    else:
        ckpt_to_resume = None
        logger.info(
            'will not try to restore previous models because --no-model-restoring'
        )

    tb_logger = loggers.TensorBoardLogger('experiment_logs')
    for hparam in list(hyper_params):
        tb_logger.experiment.add_text(hparam, str(hyper_params[hparam]))

    trainer = pl.Trainer(
        logger=tb_logger,
        gpus=args.gpu,
        distributed_backend='dp',
        val_check_interval=args.validation_interval,
        min_epochs=1,
        gradient_clip_val=hyper_params['gradient_clipping'],
        checkpoint_callback=checkpoint_callback,
        early_stop_callback=early_stopping,
        precision=hyper_params['precision'],
        resume_from_checkpoint=ckpt_to_resume,
        accumulate_grad_batches=hyper_params['accumulate_grad_batches'],
        max_epochs=hyper_params['max_epochs'])

    ret_trainee = RetrieverTrainer(ret, train_dataloader, dev_dataloaders,
                                   test_dataloader, hyper_params['loss_type'],
                                   hyper_params['optimizer'])

    if args.train:
        trainer.fit(ret_trainee)
    elif args.validate:
        trainer.test(ret_trainee)
    elif args.predict:
        model_ckpt = torch.load(ckpt_to_resume,
                                map_location=torch.device("cpu"))
        ret_trainee.load_state_dict(model_ckpt["state_dict"])
        evaluate_model(ret_trainee,
                       qa_pairs_json_file=args.predict,
                       predict_to=args.predict_to)
    else:
        logger.warning(
            'please select one between --train / --validate / --test')