Example #1
0
 def _train_eval(diff_vec_class, desc_vec_class, alp):
     metrics = "accuracy,recall,distance,nlg"
     suf = "{}_{}_{}".format(_get_class_name(diff_vec_class),
                             _get_class_name(desc_vec_class), alp)
     out_file = os.path.join(os.path.dirname(valid_file),
                             "nnupdater_result_{}.json".format(suf))
     slice_test_main(train_file,
                     valid_file,
                     out_file,
                     alpha=alp,
                     slice_count=slice_count,
                     diff_vec_class=diff_vec_class,
                     desc_vec_class=desc_vec_class)
     evaluator = Evaluator(args={
         "--metrics": metrics,
         "TEST_SET": valid_file,
         "RESULT_FILE": out_file
     })
     r, lemma_r = evaluator.evaluate()
     return suf, r, lemma_r
Example #2
0
def main():
    args = parse_args()

    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.ERROR)

    if args.predict_file is None and args.do_eval:
        raise ValueError(
            "Cannot do evaluation without an evaluation data file. Either supply a file to --eval_data_file "
            "or remove the --do_eval argument.")

    if args.output_dir and os.path.exists(args.output_dir) and \
            os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir:
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    if args.overwrite_output_dir and os.path.isdir(args.output_dir):
        shutil.rmtree(args.output_dir)
    os.mkdir(args.output_dir)

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend='nccl')
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)

    with open(os.path.join(args.output_dir, 'args.txt'), 'w') as f:
        f.write(str(args))

    for key, val in vars(args).items():
        logger.info(f"{key} - {val}")

    try:
        write_meta_data(args.output_dir, args)
    except git.exc.InvalidGitRepositoryError:
        logger.info("didn't save metadata - No git repo!")

    logger.info(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, amp training: %s",
        args.local_rank, device, args.n_gpu, bool(args.local_rank != -1),
        args.amp)

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        # Barrier to make sure only the first process in distributed training download model & vocab
        torch.distributed.barrier()

    if args.config_name:
        config = AutoConfig.from_pretrained(args.config_name,
                                            cache_dir=args.cache_dir)
    elif args.model_name_or_path:
        config = AutoConfig.from_pretrained(args.model_name_or_path,
                                            cache_dir=args.cache_dir)
    else:
        config = CONFIG_MAPPING[args.model_type]()
        logger.warning(
            "You are instantiating a new config instance from scratch.")
    #config.hidden_dropout_prob = args.encoder_dropout_prob

    if args.tokenizer_name:
        tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name,
                                                  cache_dir=args.cache_dir)
    elif args.model_name_or_path:
        tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path,
                                                  cache_dir=args.cache_dir)
    else:
        raise ValueError(
            "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it,"
            "and load it from here, using --tokenizer_name")

    config_class = LongformerConfig
    base_model_prefix = "longformer"

    S2E.config_class = config_class
    S2E.base_model_prefix = base_model_prefix
    model = S2E.from_pretrained(args.model_name_or_path,
                                config=config,
                                cache_dir=args.cache_dir,
                                args=args)

    model.to(args.device)

    if args.local_rank == 0:
        # End of barrier to make sure only the first process in distributed training download model & vocab
        torch.distributed.barrier()

    logger.info("Training/evaluation parameters %s", args)

    evaluator = Evaluator(args, tokenizer)
    # Training
    if args.do_train:
        train_dataset = get_dataset(args, tokenizer, evaluate=False)

        global_step, tr_loss = train(args, train_dataset, model, tokenizer,
                                     evaluator)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Saving best-practices: if you use save_pretrained for the model and tokenizer,
    # you can reload them using from_pretrained()
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = model.module if hasattr(
            model,
            'module') else model  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))

    # Evaluation
    results = {}

    if args.do_eval and args.local_rank in [-1, 0]:
        result = evaluator.evaluate(model,
                                    prefix="final_evaluation",
                                    official=True)
        results.update(result)
        return results

    return results
Example #3
0
def test(args):
    """Run model testing."""
    test_args = args.test_args
    logger_args = args.logger_args

    # Load the model at ckpt_path.
    ckpt_path = test_args.ckpt_path
    ckpt_save_dir = Path(ckpt_path).parent

    # Get model args from checkpoint and add them to
    # command-line specified model args.
    model_args, data_args, optim_args, logger_args\
        = ModelSaver.get_args(cl_logger_args=logger_args,
                              ckpt_save_dir=ckpt_save_dir)

    model, ckpt_info = ModelSaver.load_model(ckpt_path=ckpt_path,
                                             gpu_ids=args.gpu_ids,
                                             model_args=model_args,
                                             is_training=False)

    # Get logger.
    logger = Logger(logger_args=logger_args,
                    data_args=data_args,
                    optim_args=optim_args,
                    test_args=test_args)

    # Instantiate the Predictor class for obtaining model predictions.
    predictor = Predictor(model=model, device=args.device)

    phase = test_args.phase
    is_test = False
    if phase == 'test':
        is_test = True
        phase = 'valid'  # Run valid first to get threshold

    print(f"======================{phase}=======================")
    # Get phase loader object.
    loader = get_loader(phase=phase,
                        data_args=data_args,
                        is_training=False,
                        logger=logger)
    # Obtain model predictions.
    predictions, groundtruth = predictor.predict(loader)

    # Instantiate the evaluator class for evaluating models.
    evaluator = Evaluator(logger=logger, tune_threshold=True)

    # Get model metrics and curves on the phase dataset.
    metrics = evaluator.evaluate(groundtruth, predictions)

    # Log metrics to stdout and file.
    logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
    logger.log_metrics(metrics, phase=phase)

    # Evaluate dense to get back thresholds
    dense_loader = get_loader(phase=phase,
                              data_args=data_args,
                              is_training=False,
                              logger=logger)
    dense_predictions, dense_groundtruth = predictor.predict(dense_loader)
    dense_metrics = evaluator.dense_evaluate(dense_groundtruth,
                                             dense_predictions)

    # Log metrics to stdout and file.
    logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
    logger.log_metrics(dense_metrics, phase=phase)

    if is_test:
        phase = 'test'
        threshold = metrics['threshold']
        print(f"======================{phase}=======================")

        # Get phase loader object.
        loader = get_loader(phase=phase,
                            data_args=data_args,
                            is_training=False,
                            test_args=test_args,
                            logger=logger)
        # Obtain model predictions.
        predictions, groundtruth = predictor.predict(loader)

        # Instantiate the evaluator class for evaluating models.
        evaluator = Evaluator(logger=logger,
                              threshold=threshold,
                              tune_threshold=False)

        # Get model metrics and curves on the phase dataset.
        metrics = evaluator.evaluate(groundtruth, predictions)
        # Log metrics to stdout and file.
        logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
        logger.log_metrics(metrics, phase=phase)

        # Dense test
        phase = 'dense_test'
        dense_loader = get_loader(phase=phase,
                                  data_args=data_args,
                                  is_training=False,
                                  test_args=test_args,
                                  logger=logger)
        threshold_dense = dense_metrics["threshold_dense"]
        threshold_tunef1_dense = dense_metrics["threshold_tunef1_dense"]
        dense_predictions, dense_groundtruth = predictor.predict(dense_loader)
        dense_metrics = evaluator.dense_evaluate(
            dense_groundtruth,
            dense_predictions,
            threshold=threshold_dense,
            threshold_tunef1=threshold_tunef1_dense)
        logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
        logger.log_metrics(dense_metrics, phase=phase)
Example #4
0
def main(args):
    # 1.初始化设置
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)  # 为GPU设置随机数种子
    cudnn.benchmark = True  # 在程序刚开始加这条语句可以提升一点训练速度,没什么额外开销
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # 2.日志文件 log
    if args.evaluate == 1:
        sys.stdout = Logger(osp.join(args.logs_dir, 'log_test.txt'))
    else:
        sys.stdout = Logger(osp.join(args.logs_dir, 'log_train.txt'))

    # 3.数据集
    dataset, numclasses, train_loader, query_loader, gallery_loader = getdata(
        args.dataset, args.split, args.batch_size, args.seq_len, args.seq_srd,
        args.workers)

    # 4.建立网络
    cnn_rnn_model = models.creat(args.a1,
                                 16,
                                 32,
                                 32,
                                 numclasses,
                                 num_features=args.features,
                                 seq_len=args.seq_len,
                                 batch=args.batch_size).to(device)
    cnn_rnn_model.initialize_weights()
    # param_groups = [
    # 	{'params': cnn_rnn_model.parameters(), 'lr_mult': 1},]

    criterion = Criterion(args.hingeMargin).to(device)
    optimizer = optim.SGD(cnn_rnn_model.parameters(),
                          lr=args.lr1,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    # def adjust_lr(epoch):
    # 	if epoch <= 20:
    # 		lr = 0.01
    # 	elif epoch > 20 & epoch <= 200:
    # 		lr = 0.001
    # 	else:
    # 		lr = 0.0001
    # 	for g in optimizer.param_groups:
    # 		g['lr'] = lr * g.get('lr_mult', 1)

    # 5.trainer实例化
    trainer = SEQTrainer(cnn_rnn_model, criterion)

    # 6.evaluate类的实例化
    evaluator = Evaluator(cnn_rnn_model)
    best_top1 = 0

    # 6.进入训练/测试模式
    if args.evaluate == 1:
        checkpoint = load_checkpoint(
            osp.join(args.logs_dir, 'model_best.pth.tar'))
        cnn_rnn_model.load_state_dict(checkpoint['state_dict'])
        rank1 = evaluator.evaluate(query_loader, gallery_loader,
                                   dataset.queryinfo, dataset.galleryinfo)
    else:
        cnn_rnn_model.train()
        for epoch in range(args.start_epoch, args.epochs):
            # adjust_lr(epoch)
            trainer.train(epoch, train_loader, optimizer)

            # 每隔几个epoch测试一次
            if (epoch + 1) % 20 == 0 or (epoch + 1) == args.epochs:

                top1 = evaluator.evaluate(query_loader, gallery_loader,
                                          dataset.queryinfo,
                                          dataset.galleryinfo)
                is_best = top1 > best_top1
                if is_best:
                    best_top1 = top1

                save_checkpoint(
                    {
                        'state_dict': cnn_rnn_model.state_dict(),
                        'epoch': epoch + 1,
                        'best_top1': best_top1,
                    },
                    is_best,
                    fpath=osp.join(args.logs_dir, 'cnn_checkpoint.pth.tar'))
Example #5
0
def train(args):
    """Run model training."""

    # Get nested namespaces.
    model_args = args.model_args
    logger_args = args.logger_args
    optim_args = args.optim_args
    data_args = args.data_args

    # Get logger.
    logger = Logger(logger_args)

    if model_args.ckpt_path:
        # CL-specified args are used to load the model, rather than the
        # ones saved to args.json.
        model_args.pretrained = False
        ckpt_path = model_args.ckpt_path
        assert False
        model, ckpt_info = ModelSaver.load_model(ckpt_path=ckpt_path,
                                                 gpu_ids=args.gpu_ids,
                                                 model_args=model_args,
                                                 is_training=True)
        optim_args.start_epoch = ckpt_info['epoch'] + 1
    else:
        # If no ckpt_path is provided, instantiate a new randomly
        # initialized model.
        model_fn = models.__dict__[model_args.model]
        model = model_fn(model_args)
        model = nn.DataParallel(model, args.gpu_ids)
    # Put model on gpu or cpu and put into training mode.
    model = model.to(args.device)
    model.train()

    # Get train and valid loader objects.
    train_loader = get_loader(phase="train",
                              data_args=data_args,
                              is_training=True,
                              logger=logger)
    valid_loader = get_loader(phase="valid",
                              data_args=data_args,
                              is_training=False,
                              logger=logger)
    dense_valid_loader = get_loader(phase="dense_valid",
                                    data_args=data_args,
                                    is_training=False,
                                    logger=logger)

    # Instantiate the predictor class for obtaining model predictions.
    predictor = Predictor(model, args.device)

    # Instantiate the evaluator class for evaluating models.
    # By default, get best performance on validation set.
    evaluator = Evaluator(logger=logger, tune_threshold=True)

    # Instantiate the saver class for saving model checkpoints.
    saver = ModelSaver(save_dir=logger_args.save_dir,
                       iters_per_save=logger_args.iters_per_save,
                       max_ckpts=logger_args.max_ckpts,
                       metric_name=optim_args.metric_name,
                       maximize_metric=optim_args.maximize_metric,
                       keep_topk=True,
                       logger=logger)

    # Instantiate the optimizer class for guiding model training.
    optimizer = Optimizer(parameters=model.parameters(),
                          optim_args=optim_args,
                          batch_size=data_args.batch_size,
                          iters_per_print=logger_args.iters_per_print,
                          iters_per_visual=logger_args.iters_per_visual,
                          iters_per_eval=logger_args.iters_per_eval,
                          dataset_len=len(train_loader.dataset),
                          logger=logger)
    if model_args.ckpt_path:
        # Load the same optimizer as used in the original training.
        optimizer.load_optimizer(ckpt_path=model_args.ckpt_path,
                                 gpu_ids=args.gpu_ids)

    loss_fn = evaluator.get_loss_fn(loss_fn_name=optim_args.loss_fn)

    # Run training
    while not optimizer.is_finished_training():
        optimizer.start_epoch()

        for inputs, targets in train_loader:
            optimizer.start_iter()

            if optimizer.global_step % optimizer.iters_per_eval == 0:
                # Only evaluate every iters_per_eval examples.
                predictions, groundtruth = predictor.predict(valid_loader)
                metrics = evaluator.evaluate(groundtruth, predictions)

                # Evaluate on dense dataset
                dense_predictions, dense_groundtruth = predictor.predict(
                    dense_valid_loader)
                dense_metrics = evaluator.dense_evaluate(
                    dense_groundtruth, dense_predictions)
                # Merge the metrics dicts together
                metrics = {**metrics, **dense_metrics}

                # Log metrics to stdout.
                logger.log_metrics(metrics, phase='valid')

                # Log to tb
                logger.log_scalars(metrics,
                                   optimizer.global_step,
                                   phase='valid')

                if optimizer.global_step % logger_args.iters_per_save == 0:
                    # Only save every iters_per_save examples directly
                    # after evaluation.
                    saver.save(iteration=optimizer.global_step,
                               epoch=optimizer.epoch,
                               model=model,
                               optimizer=optimizer,
                               device=args.device,
                               metric_val=metrics[optim_args.metric_name])

                # Step learning rate scheduler.
                optimizer.step_scheduler(metrics[optim_args.metric_name])

            with torch.set_grad_enabled(True):

                # Run the minibatch through the model.
                logits = model(inputs.to(args.device))

                # Compute the minibatch loss.
                loss = loss_fn(logits, targets.to(args.device))

                # Log the data from this iteration.
                optimizer.log_iter(inputs, logits, targets, loss)

                # Perform a backward pass.
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            optimizer.end_iter()

        optimizer.end_epoch(metrics)

    # Save the most recent model.
    saver.save(iteration=optimizer.global_step,
               epoch=optimizer.epoch,
               model=model,
               optimizer=optimizer,
               device=args.device,
               metric_val=metrics[optim_args.metric_name])