def main(args):
    log_file_path = args.log
    if is_main_process() and log_file_path is not None:
        setup_log_file(os.path.expanduser(log_file_path))

    distributed, device_ids = init_distributed_mode(args.world_size,
                                                    args.dist_url)
    logger.info(args)
    cudnn.benchmark = True
    set_seed(args.seed)
    config = yaml_util.load_yaml_file(os.path.expanduser(args.config))
    device = torch.device(args.device)
    dataset_dict = util.get_all_datasets(config['datasets'])
    models_config = config['models']
    teacher_model_config = models_config.get('teacher_model', None)
    teacher_model = load_model(
        teacher_model_config,
        device) if teacher_model_config is not None else None
    student_model_config =\
        models_config['student_model'] if 'student_model' in models_config else models_config['model']
    ckpt_file_path = student_model_config['ckpt']
    student_model = load_model(student_model_config, device)
    if args.log_config:
        logger.info(config)

    if not args.test_only:
        train(teacher_model, student_model, dataset_dict, ckpt_file_path,
              device, device_ids, distributed, config, args)
        student_model_without_ddp =\
            student_model.module if module_util.check_if_wrapped(student_model) else student_model
        load_ckpt(student_model_config['ckpt'],
                  model=student_model_without_ddp,
                  strict=True)

    test_config = config['test']
    test_data_loader_config = test_config['test_data_loader']
    test_data_loader = util.build_data_loader(
        dataset_dict[test_data_loader_config['dataset_id']],
        test_data_loader_config, distributed)
    log_freq = test_config.get('log_freq', 1000)
    iou_types = args.iou_types
    if not args.student_only and teacher_model is not None:
        evaluate(teacher_model,
                 test_data_loader,
                 iou_types,
                 device,
                 device_ids,
                 distributed,
                 log_freq=log_freq,
                 title='[Teacher: {}]'.format(teacher_model_config['name']))
    evaluate(student_model,
             test_data_loader,
             iou_types,
             device,
             device_ids,
             distributed,
             log_freq=log_freq,
             title='[Student: {}]'.format(student_model_config['name']))
def main(args):
    log_file_path = args.log
    if is_main_process() and log_file_path is not None:
        setup_log_file(os.path.expanduser(log_file_path))

    distributed, device_ids = init_distributed_mode(args.world_size,
                                                    args.dist_url)
    logger.info(args)
    cudnn.benchmark = True
    config = yaml_util.load_yaml_file(os.path.expanduser(args.config))
    device = torch.device(args.device)
    dataset_dict = util.get_all_dataset(config['datasets'])
    models_config = config['models']
    teacher_model_config = models_config['teacher_model']
    teacher_model = get_model(teacher_model_config, device)
    student_model_config = models_config['student_model']
    student_model = get_model(student_model_config, device)
    if not args.test_only:
        distill(teacher_model, student_model, dataset_dict, device, device_ids,
                distributed, config, args)
        student_model_without_ddp =\
            student_model.module if module_util.check_if_wrapped(student_model) else student_model
        load_ckpt(student_model_config['ckpt'],
                  model=student_model_without_ddp,
                  strict=True)

    test_config = config['test']
    test_data_loader_config = test_config['test_data_loader']
    test_data_loader = util.build_data_loader(
        dataset_dict[test_data_loader_config['dataset_id']],
        test_data_loader_config, distributed)
    num_classes = args.num_classes
    if not args.student_only:
        evaluate(teacher_model,
                 test_data_loader,
                 device,
                 device_ids,
                 distributed,
                 num_classes=num_classes,
                 title='[Teacher: {}]'.format(teacher_model_config['name']))
    evaluate(student_model,
             test_data_loader,
             device,
             device_ids,
             distributed,
             num_classes=num_classes,
             title='[Student: {}]'.format(student_model_config['name']))
Example #3
0
def predict_private(model, dataset_dict, label_names_dict, is_regression,
                    accelerator, private_configs, private_output_dir_path):
    logger.info('Start prediction for private dataset(s)')
    model.eval()
    for private_config in private_configs:
        # Dataset
        private_data_loader_config = private_config['private_data_loader']
        private_dataset_id = private_data_loader_config['dataset_id']
        private_dataset = dataset_dict[private_dataset_id]
        label_names = label_names_dict[private_data_loader_config['task_name']]
        logger.info('{}: {} samples'.format(private_dataset_id,
                                            len(private_dataset)))

        # Dataloader
        private_data_loader = util.build_data_loader(
            private_dataset, private_data_loader_config, False)
        private_data_loader = accelerator.prepare(private_data_loader)

        # Prediction
        private_output_file_path = os.path.join(private_output_dir_path,
                                                private_config['pred_output'])
        file_util.make_parent_dirs(private_output_file_path)
        np_preds = None
        for batch in private_data_loader:
            batch.pop('labels')
            outputs = model(**batch)
            predictions = outputs.logits.argmax(
                dim=-1) if not is_regression else outputs.logits.squeeze()
            predictions = predictions.detach().cpu().numpy()
            np_preds = predictions if np_preds is None else np.append(
                np_preds, predictions, axis=0)

        df_output = pd.DataFrame({'prediction': np_preds})
        # Map prediction index to label name
        if not is_regression and private_config.get('idx2str', True):
            df_output.prediction = df_output.prediction.apply(
                lambda pred_idx: label_names[pred_idx])
        df_output.to_csv(private_output_file_path,
                         sep='\t',
                         index=True,
                         index_label='index')
Example #4
0
def main(args):
    log_file_path = args.log
    if is_main_process() and log_file_path is not None:
        setup_log_file(os.path.expanduser(log_file_path))

    world_size = args.world_size
    logger.info(args)
    cudnn.benchmark = True
    set_seed(args.seed)
    config = yaml_util.load_yaml_file(os.path.expanduser(args.config))

    # Initialize the accelerator. We will let the accelerator handle device placement for us in this example.
    accelerator = Accelerator()
    distributed = accelerator.state.distributed_type == DistributedType.MULTI_GPU
    device_ids = [accelerator.device.index]
    if distributed:
        setup_for_distributed(is_main_process())

    logger.info(accelerator.state)
    device = accelerator.device

    # Setup logging, we only want one process per machine to log things on the screen.
    # accelerator.is_local_main_process is only True for one process per machine.
    logger.setLevel(
        logging.INFO if accelerator.is_local_main_process else logging.ERROR)
    if accelerator.is_local_main_process:
        datasets.utils.logging.set_verbosity_warning()
        transformers.utils.logging.set_verbosity_info()
    else:
        datasets.utils.logging.set_verbosity_error()
        transformers.utils.logging.set_verbosity_error()

    # Load pretrained model and tokenizer
    task_name = args.task_name
    models_config = config['models']
    teacher_model_config = models_config.get('teacher_model', None)
    teacher_tokenizer, teacher_model = (None, None) if teacher_model_config is None \
        else load_tokenizer_and_model(teacher_model_config, task_name, True)
    student_model_config =\
        models_config['student_model'] if 'student_model' in models_config else models_config['model']
    student_tokenizer, student_model = load_tokenizer_and_model(
        student_model_config, task_name)
    ckpt_dir_path = student_model_config['ckpt']
    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
    # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
    dataset_dict, label_names_dict, is_regression = \
        get_all_datasets(config['datasets'], task_name, student_tokenizer, student_model)

    # Update config with dataset size len(data_loader)
    customize_lr_config(config, dataset_dict, world_size)

    # register collate function
    register_collate_func(
        DataCollatorWithPadding(
            student_tokenizer,
            pad_to_multiple_of=(8 if accelerator.use_fp16 else None)))

    # Get the metric function
    metric = get_metrics(task_name)

    if not args.test_only:
        train(teacher_model, student_model, dataset_dict, is_regression,
              ckpt_dir_path, metric, device, device_ids, distributed, config,
              args, accelerator)
        student_tokenizer.save_pretrained(ckpt_dir_path)

    test_config = config['test']
    test_data_loader_config = test_config['test_data_loader']
    test_data_loader = util.build_data_loader(
        dataset_dict[test_data_loader_config['dataset_id']],
        test_data_loader_config, distributed)
    test_data_loader = accelerator.prepare(test_data_loader)
    if not args.student_only and teacher_model is not None:
        teacher_model = teacher_model.to(accelerator.device)
        evaluate(teacher_model,
                 test_data_loader,
                 metric,
                 is_regression,
                 accelerator,
                 title='[Teacher: {}]'.format(teacher_model_config['name']))

    # Reload best checkpoint based on validation result
    student_tokenizer, student_model = load_tokenizer_and_model(
        student_model_config, task_name, True)
    student_model = accelerator.prepare(student_model)
    evaluate(student_model,
             test_data_loader,
             metric,
             is_regression,
             accelerator,
             title='[Student: {}]'.format(student_model_config['name']))

    # Output prediction for private dataset(s) if both the config and output dir path are given
    private_configs = config.get('private', None)
    private_output_dir_path = args.private_output
    if private_configs is not None and private_output_dir_path is not None and is_main_process(
    ):
        predict_private(student_model, dataset_dict, label_names_dict,
                        is_regression, accelerator, private_configs,
                        private_output_dir_path)