Exemple #1
0
def main(config):
    # Get pretrained tokenizer.
    tokenizer = AutoTokenizer.from_pretrained(config.pretrained_model_name)
    # Get dataloaders using tokenizer from untokenized corpus.
    train_loader, valid_loader, index_to_label = get_loaders(
        config.train_fn, tokenizer)

    print(
        '|train| =',
        len(train_loader) * config.batch_size,
        '|valid| =',
        len(valid_loader) * config.batch_size,
    )

    # Get pretrained model with specified softmax layer.
    model = AutoModelForSequenceClassification.from_pretrained(
        config.pretrained_model_name, num_labels=len(index_to_label))
    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.01
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]

    optimizer = optim.AdamW(optimizer_grouped_parameters,
                            lr=config.lr,
                            eps=config.adam_epsilon)
    # By default, model has softmax layer, not log-softmax layer.
    # Therefore, we need CrossEntropyLoss, not NLLLoss.
    crit = nn.CrossEntropyLoss()

    n_total_iterations = len(train_loader) * config.n_epochs
    n_warmup_steps = int(n_total_iterations * config.warmup_ratio)
    scheduler = get_linear_schedule_with_warmup(optimizer, n_warmup_steps,
                                                n_total_iterations)

    if config.gpu_id >= 0:
        model.cuda(config.gpu_id)
        crit.cuda(config.gpu_id)

    # Start train.
    trainer = Trainer(config)
    model = trainer.train(
        model,
        crit,
        optimizer,
        scheduler,
        train_loader,
        valid_loader,
    )

    torch.save(
        {
            'rnn': None,
            'cnn': None,
            'bert': model.state_dict(),
            'config': config,
            'vocab': None,
            'classes': index_to_label,
            'tokenizer': tokenizer,
        }, config.model_fn)
def main(args):
    device = torch.device(args.device)
    if args.seed is not None:
        random_seed(args.seed)

    tokenizer = AutoTokenizer.from_pretrained(args.transformers_path)
    non_modified_data = json.load(Path(args.non_modified_data).open('r'))

    result = dict()
    paths = Path(args.data_folder).glob('*.json')
    for path in paths:
        print('=' * 50)
        method, bpt = str(path).split('/')[2].split('.')[0].split('_')
        bpt = int(bpt)
        print(f"method: {method}, beats_per_token: {bpt}")
        if bpt not in result.keys():
            result[bpt] = dict()
        result[bpt][method] = {
            'acc': list(),
            'roc_auc': list(),
            'f1score': list()
        }

        for j in range(args.n_splits):
            print(f"{j + 1} SPLIT OUT OF {args.n_splits}")
            seed = random.randint(0, 10e6)

            modified_data = json.load(Path(path).open('r'))
            train_data, test_data = get_train_test_data(
                random.sample(non_modified_data,
                              args.non_modified_data_sample_size),
                modified_data,
                test_size=args.test_size,
                random_state=seed)
            transform = CustomTransform(tokenizer, max_len=100)
            train_dataset = CustomDataset(train_data[0],
                                          train_data[1],
                                          transform=transform)
            test_dataset = CustomDataset(test_data[0],
                                         test_data[1],
                                         transform=transform)
            batcher = {
                'train':
                DataLoader(train_dataset,
                           batch_size=args.batch_size,
                           shuffle=True),
                'dev':
                DataLoader(test_dataset, batch_size=args.batch_size)
            }

            config = AutoConfig.from_pretrained(args.transformers_path,
                                                num_labels=2)
            model = AutoModelForSequenceClassification.from_pretrained(
                args.transformers_path, config=config).to(device)

            train(model, batcher, args)
            checkpoint = torch.load(args.checkpoint_path)
            model.load_state_dict(checkpoint['model_state_dict'])
            current_res = evaluate(model, batcher, args)
            result[bpt][method]['acc'].append(current_res['acc'])
            result[bpt][method]['roc_auc'].append(current_res['roc_auc'])
            result[bpt][method]['f1score'].append(current_res['f1score'])

            json.dump(result, Path(args.result_path).open('w'))
            del model, checkpoint, train_data, test_data, train_dataset, test_dataset, batcher
Exemple #3
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses(
        )

    if (os.path.exists(training_args.output_dir)
            and os.listdir(training_args.output_dir) and training_args.do_train
            and not training_args.overwrite_output_dir):
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
            "Use --overwrite_output_dir to overcome.")

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO
        if is_main_process(training_args.local_rank) else logging.WARN,
    )

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        +
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(training_args.local_rank):
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()
    logger.info(f"Training/evaluation parameters {training_args}")

    # Set seed before initializing model.
    set_seed(training_args.seed)

    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
    # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
    #
    # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the
    # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named
    # label if at least two columns are provided.
    #
    # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this
    # single column. You can easily tweak this behavior (see below)
    #
    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
    # download the dataset.
    if data_args.task_name is not None:
        # Downloading and loading a dataset from the hub.
        datasets = load_dataset("glue", data_args.task_name)
    elif data_args.train_file.endswith(".csv"):
        # Loading a dataset from local csv files
        datasets = load_dataset("csv",
                                data_files={
                                    "train": data_args.train_file,
                                    "validation": data_args.validation_file
                                })
    else:
        # Loading a dataset from local json files
        datasets = load_dataset("json",
                                data_files={
                                    "train": data_args.train_file,
                                    "validation": data_args.validation_file
                                })
    # See more about loading any type of standard or custom dataset at
    # https://huggingface.co/docs/datasets/loading_datasets.html.

    # Labels
    if data_args.task_name is not None:
        is_regression = data_args.task_name == "stsb"
        if not is_regression:
            label_list = datasets["train"].features["label"].names
            num_labels = len(label_list)
        else:
            num_labels = 1
    else:
        # Trying to have good defaults here, don't hesitate to tweak to your needs.
        is_regression = datasets["train"].features["label"].dtype in [
            "float32", "float64"
        ]
        if is_regression:
            num_labels = 1
        else:
            # A useful fast method:
            # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique
            label_list = datasets["train"].unique("label")
            label_list.sort()  # Let's sort it for determinism
            num_labels = len(label_list)

    # Load pretrained model and tokenizer
    #
    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config = AutoConfig.from_pretrained(
        model_args.config_name
        if model_args.config_name else model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name
        if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        use_fast=model_args.use_fast_tokenizer,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )

    # Preprocessing the datasets
    if data_args.task_name is not None:
        sentence1_key, sentence2_key = task_to_keys[data_args.task_name]
    else:
        # Again, we try to have some nice defaults but don't hesitate to tweak to your use case.
        non_label_column_names = [
            name for name in datasets["train"].column_names if name != "label"
        ]
        if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names:
            sentence1_key, sentence2_key = "sentence1", "sentence2"
        else:
            if len(non_label_column_names) >= 2:
                sentence1_key, sentence2_key = non_label_column_names[:2]
            else:
                sentence1_key, sentence2_key = non_label_column_names[0], None

    # Padding strategy
    if data_args.pad_to_max_length:
        padding = "max_length"
        max_length = data_args.max_seq_length
    else:
        # We will pad later, dynamically at batch creation, to the max sequence length in each batch
        padding = False
        max_length = None

    # Some models have set the order of the labels to use, so let's make sure we do use it.
    label_to_id = None
    if (model.config.label2id !=
            PretrainedConfig(num_labels=num_labels).label2id
            and data_args.task_name is not None and is_regression):
        # Some have all caps in their config, some don't.
        label_name_to_id = {
            k.lower(): v
            for k, v in model.config.label2id.items()
        }
        if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
            label_to_id = {
                i: label_name_to_id[label_list[i]]
                for i in range(num_labels)
            }
        else:
            logger.warn(
                "Your model seems to have been trained with labels, but they don't match the dataset: ",
                f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
                "\nIgnoring the model labels as a result.",
            )
    elif data_args.task_name is None and not is_regression:
        label_to_id = {v: i for i, v in enumerate(label_list)}

    def preprocess_function(examples):
        # Tokenize the texts
        args = ((examples[sentence1_key], ) if sentence2_key is None else
                (examples[sentence1_key], examples[sentence2_key]))
        result = tokenizer(*args,
                           padding=padding,
                           max_length=max_length,
                           truncation=True)

        # Map labels to IDs (not necessary for GLUE tasks)
        if label_to_id is not None and "label" in examples:
            result["label"] = [label_to_id[l] for l in examples["label"]]
        return result

    datasets = datasets.map(preprocess_function,
                            batched=True,
                            load_from_cache_file=not data_args.overwrite_cache)

    train_dataset = datasets["train"]
    eval_dataset = datasets["validation_matched" if data_args.task_name ==
                            "mnli" else "validation"]
    if data_args.task_name is not None:
        test_dataset = datasets["test_matched" if data_args.task_name ==
                                "mnli" else "test"]

    # Log a few random samples from the training set:
    for index in random.sample(range(len(train_dataset)), 3):
        logger.info(
            f"Sample {index} of the training set: {train_dataset[index]}.")

    # Get the metric function
    if data_args.task_name is not None:
        metric = load_metric("glue", data_args.task_name)
    # TODO: When datasets metrics include regular accuracy, make an else here and remove special branch from
    # compute_metrics

    # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
    # predictions and label_ids field) and has to return a dictionary string to float.
    def compute_metrics(p: EvalPrediction):
        preds = p.predictions[0] if isinstance(p.predictions,
                                               tuple) else p.predictions
        preds = np.squeeze(preds) if is_regression else np.argmax(preds,
                                                                  axis=1)
        if data_args.task_name is not None:
            result = metric.compute(predictions=preds, references=p.label_ids)
            if len(result) > 1:
                result["combined_score"] = np.mean(list(
                    result.values())).item()
            return result
        elif is_regression:
            return {"mse": ((preds - p.label_ids)**2).mean().item()}
        else:
            return {
                "accuracy":
                (preds == p.label_ids).astype(np.float32).mean().item()
            }

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset if training_args.do_eval else None,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
        data_collator=default_data_collator
        if data_args.pad_to_max_length else None,
    )

    # Training
    if training_args.do_train:
        train_result = trainer.train(
            model_path=model_args.model_name_or_path if os.path.
            isdir(model_args.model_name_or_path) else None)
        metrics = train_result.metrics

        trainer.save_model()  # Saves the tokenizer too for easy upload

        output_train_file = os.path.join(training_args.output_dir,
                                         "train_results.txt")
        if trainer.is_world_process_zero():
            with open(output_train_file, "w") as writer:
                logger.info("***** Train results *****")
                for key, value in sorted(metrics.items()):
                    logger.info(f"  {key} = {value}")
                    writer.write(f"{key} = {value}\n")

            # Need to save the state, since Trainer.save_model saves only the tokenizer with the model
            trainer.state.save_to_json(
                os.path.join(training_args.output_dir, "trainer_state.json"))

    # Evaluation
    eval_results = {}
    if training_args.do_eval:
        logger.info("*** Evaluate ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        tasks = [data_args.task_name]
        eval_datasets = [eval_dataset]
        if data_args.task_name == "mnli":
            tasks.append("mnli-mm")
            eval_datasets.append(datasets["validation_mismatched"])

        for eval_dataset, task in zip(eval_datasets, tasks):
            eval_result = trainer.evaluate(eval_dataset=eval_dataset)

            output_eval_file = os.path.join(training_args.output_dir,
                                            f"eval_results_{task}.txt")
            if trainer.is_world_process_zero():
                with open(output_eval_file, "w") as writer:
                    logger.info(f"***** Eval results {task} *****")
                    for key, value in sorted(eval_result.items()):
                        logger.info(f"  {key} = {value}")
                        writer.write(f"{key} = {value}\n")

            eval_results.update(eval_result)

    if training_args.do_predict:
        logger.info("*** Test ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        tasks = [data_args.task_name]
        test_datasets = [test_dataset]
        if data_args.task_name == "mnli":
            tasks.append("mnli-mm")
            test_datasets.append(datasets["test_mismatched"])

        for test_dataset, task in zip(test_datasets, tasks):
            # Removing the `label` columns because it contains -1 and Trainer won't like that.
            test_dataset.remove_columns_("label")
            predictions = trainer.predict(
                test_dataset=test_dataset).predictions
            predictions = np.squeeze(
                predictions) if is_regression else np.argmax(predictions,
                                                             axis=1)

            output_test_file = os.path.join(training_args.output_dir,
                                            f"test_results_{task}.txt")
            if trainer.is_world_process_zero():
                with open(output_test_file, "w") as writer:
                    logger.info(f"***** Test results {task} *****")
                    writer.write("index\tprediction\n")
                    for index, item in enumerate(predictions):
                        if is_regression:
                            writer.write(f"{index}\t{item:3.3f}\n")
                        else:
                            item = label_list[item]
                            writer.write(f"{index}\t{item}\n")
    return eval_results
Exemple #4
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    # Setup distant debugging if needed
    if data_args.server_ip and data_args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(data_args.server_ip,
                                     data_args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )
    logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        +
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )

    # Set the verbosity to info of the Transformers logger (on main process only):
    if training_args.should_log:
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()
    logger.info(f"Training/evaluation parameters {training_args}")

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(
            training_args.output_dir
    ) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(
                training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome.")
        elif last_checkpoint is not None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Set seed before initializing model.
    set_seed(training_args.seed)

    # In distributed training, the load_dataset function guarantees that only one local process can concurrently
    # download the dataset.
    # Downloading and loading xnli dataset from the hub.
    if training_args.do_train:
        if model_args.train_language is None:
            train_dataset = load_dataset("xnli",
                                         model_args.language,
                                         split="train",
                                         cache_dir=model_args.cache_dir)
        else:
            train_dataset = load_dataset("xnli",
                                         model_args.train_language,
                                         split="train",
                                         cache_dir=model_args.cache_dir)
        label_list = train_dataset.features["label"].names

    if training_args.do_eval:
        eval_dataset = load_dataset("xnli",
                                    model_args.language,
                                    split="validation",
                                    cache_dir=model_args.cache_dir)
        label_list = eval_dataset.features["label"].names

    if training_args.do_predict:
        predict_dataset = load_dataset("xnli",
                                       model_args.language,
                                       split="test",
                                       cache_dir=model_args.cache_dir)
        label_list = predict_dataset.features["label"].names

    # Labels
    num_labels = len(label_list)

    # Load pretrained model and tokenizer
    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config = AutoConfig.from_pretrained(
        model_args.config_name
        if model_args.config_name else model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task="xnli",
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name
        if model_args.tokenizer_name else model_args.model_name_or_path,
        do_lower_case=model_args.do_lower_case,
        cache_dir=model_args.cache_dir,
        use_fast=model_args.use_fast_tokenizer,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )

    # Preprocessing the datasets
    # Padding strategy
    if data_args.pad_to_max_length:
        padding = "max_length"
    else:
        # We will pad later, dynamically at batch creation, to the max sequence length in each batch
        padding = False

    def preprocess_function(examples):
        # Tokenize the texts
        return tokenizer(
            examples["premise"],
            examples["hypothesis"],
            padding=padding,
            max_length=data_args.max_seq_length,
            truncation=True,
        )

    if training_args.do_train:
        if data_args.max_train_samples is not None:
            train_dataset = train_dataset.select(
                range(data_args.max_train_samples))
        train_dataset = train_dataset.map(
            preprocess_function,
            batched=True,
            load_from_cache_file=not data_args.overwrite_cache,
            desc="Running tokenizer on train dataset",
        )
        # Log a few random samples from the training set:
        for index in random.sample(range(len(train_dataset)), 3):
            logger.info(
                f"Sample {index} of the training set: {train_dataset[index]}.")

    if training_args.do_eval:
        if data_args.max_eval_samples is not None:
            eval_dataset = eval_dataset.select(
                range(data_args.max_eval_samples))
        eval_dataset = eval_dataset.map(
            preprocess_function,
            batched=True,
            load_from_cache_file=not data_args.overwrite_cache,
            desc="Running tokenizer on validation dataset",
        )

    if training_args.do_predict:
        if data_args.max_predict_samples is not None:
            predict_dataset = predict_dataset.select(
                range(data_args.max_predict_samples))
        predict_dataset = predict_dataset.map(
            preprocess_function,
            batched=True,
            load_from_cache_file=not data_args.overwrite_cache,
            desc="Running tokenizer on prediction dataset",
        )

    # Get the metric function
    metric = load_metric("xnli")

    # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
    # predictions and label_ids field) and has to return a dictionary string to float.
    def compute_metrics(p: EvalPrediction):
        preds = p.predictions[0] if isinstance(p.predictions,
                                               tuple) else p.predictions
        preds = np.argmax(preds, axis=1)
        return metric.compute(predictions=preds, references=p.label_ids)

    # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
    if data_args.pad_to_max_length:
        data_collator = default_data_collator
    elif training_args.fp16:
        data_collator = DataCollatorWithPadding(tokenizer,
                                                pad_to_multiple_of=8)
    else:
        data_collator = None

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset if training_args.do_train else None,
        eval_dataset=eval_dataset if training_args.do_eval else None,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    # Training
    if training_args.do_train:
        checkpoint = None
        if training_args.resume_from_checkpoint is not None:
            checkpoint = training_args.resume_from_checkpoint
        elif last_checkpoint is not None:
            checkpoint = last_checkpoint
        train_result = trainer.train(resume_from_checkpoint=checkpoint)
        metrics = train_result.metrics
        max_train_samples = (data_args.max_train_samples
                             if data_args.max_train_samples is not None else
                             len(train_dataset))
        metrics["train_samples"] = min(max_train_samples, len(train_dataset))

        trainer.save_model()  # Saves the tokenizer too for easy upload

        trainer.log_metrics("train", metrics)
        trainer.save_metrics("train", metrics)
        trainer.save_state()

    # Evaluation
    if training_args.do_eval:
        logger.info("*** Evaluate ***")
        metrics = trainer.evaluate(eval_dataset=eval_dataset)

        max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(
            eval_dataset)
        metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

        trainer.log_metrics("eval", metrics)
        trainer.save_metrics("eval", metrics)

    # Prediction
    if training_args.do_predict:
        logger.info("*** Predict ***")
        predictions, labels, metrics = trainer.predict(
            predict_dataset, metric_key_prefix="predict")

        max_predict_samples = (data_args.max_predict_samples
                               if data_args.max_predict_samples is not None
                               else len(predict_dataset))
        metrics["predict_samples"] = min(max_predict_samples,
                                         len(predict_dataset))

        trainer.log_metrics("predict", metrics)
        trainer.save_metrics("predict", metrics)

        predictions = np.argmax(predictions, axis=1)
        output_predict_file = os.path.join(training_args.output_dir,
                                           "predictions.txt")
        if trainer.is_world_process_zero():
            with open(output_predict_file, "w") as writer:
                writer.write("index\tprediction\n")
                for index, item in enumerate(predictions):
                    item = label_list[item]
                    writer.write(f"{index}\t{item}\n")
Exemple #5
0
def main():
    try:
        from gpiozero import LED
        led = LED(12)
    except ImportError:
        print('GPIO Not Found')

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-l',
        '--local',
        action='store_true',
        help='Start in local mode given you have a tweet server')
    parser.add_argument('--host',
                        type=str,
                        help='Hostname of the tweet server',
                        default='localhost')
    parser.add_argument('-p',
                        '--port',
                        type=str,
                        help='Port of the tweet server',
                        default='5000')
    args = parser.parse_args()
    local = args.local
    host = args.host
    port = args.port

    print("Loading model...")
    model = AutoModelForSequenceClassification.from_pretrained(
        'finetuned_model')
    tokenizer = AutoTokenizer.from_pretrained('finetuned_model')
    print(model)
    print('Model Loaded!')

    if local:
        while True:
            screen_clear()
            r = requests.get(f'http://{host}:{port}').json()
            pred, text, masked, time_elapsed = classify_text(
                model, tokenizer, r.get('text'))
            print_centre(text)
            if pred == 1:
                task = threading.Thread(target=alert)
                task.start()

            time.sleep(10)

    else:
        headers = initialize_stream_header()
        with requests.get(
                "https://api.twitter.com/2/tweets/search/stream",
                headers=headers,
                stream=True,
        ) as response:
            sys.stdout.flush()
            if response.status_code != 200:
                raise Exception("Cannot get stream (HTTP {}): {}".format(
                    response.status_code, response.text))
            for response_line in response.iter_lines():
                if response_line:
                    screen_clear()
                    json_response = json.loads(response_line)
                    pred, text, masked, time_elapsed = classify_text(
                        model, tokenizer, json_response['data']['text'])
                    print_centre(text)
                    if pred == 1:
                        task = threading.Thread(target=alert)
                        task.start()
Exemple #6
0
def train_discriminator(
    run_name: str,
    model_path: str,
    config_file: str,
    train_file: str,
    train_fraq: float,
    dataset_type: str,
    output_model_path: str,
):
    logging.set_verbosity_info()
    config = json.loads(jsonnet_evaluate_file(config_file))
    init_wandb(run_name, config)

    agency_list = config['agency_list']
    print('Agency list:', agency_list)

    max_tokens_text = config["max_tokens_text"]
    max_tokens_title = config["max_tokens_title"]

    tokenizer_model_path = config["tokenizer_model_path"]
    tokenizer = BertTokenizer.from_pretrained(tokenizer_model_path,
                                              do_lower_case=False,
                                              do_basic_tokenize=False)

    print("Fetching data...")
    if dataset_type == 'tg':
        all_records = [
            r for r in tqdm.tqdm(tg_reader(train_file, agency_list))
        ]
        full_dataset = AgencyTitleDatasetClassification(
            all_records,
            tokenizer,
            agency_list,
            max_tokens_text=max_tokens_text,
            max_tokens_title=max_tokens_title)
    elif dataset_type == 'lenta-ria':
        lenta_records = [
            r for r in tqdm.tqdm(
                lenta_reader(
                    os.path.join(train_file, 'lenta/lenta-ru-news.train.csv')))
        ]
        lenta_records.extend([
            r for r in tqdm.tqdm(
                lenta_reader(
                    os.path.join(train_file, 'lenta/lenta-ru-news.val.csv')))
        ])

        ria_records = [
            r for r in tqdm.tqdm(
                ria_reader(
                    os.path.join(train_file, 'ria/ria.shuffled.train.json')))
        ]
        ria_records.extend([
            r for r in tqdm.tqdm(
                ria_reader(
                    os.path.join(train_file, 'ria/ria.shuffled.val.json')))
        ])

        records = [
            r for r in reader(
                '/home/aobuhtijarov/datasets/full_lenta_ria.test.jsonl')
        ]

        filter_lenta = [{
            'text': r['lenta_text'],
            'title': r['lenta_title'],
            'agency': 'lenta.ru',
            'date': r['lenta_date']
        } for r in records]

        filter_ria = [{
            'text': r['ria_text'],
            'title': r['ria_title'],
            'agency': 'РИА Новости',
            'date': r['lenta_date']
        } for r in records]

        lenta_filter_titles = set(x['title'] for x in filter_lenta)
        ria_filter_titles = set(x['title'] for x in filter_ria)
        lenta_records = [
            r for r in lenta_records if r['title'] not in lenta_filter_titles
        ]
        ria_records = [
            r for r in ria_records if r['title'] not in ria_filter_titles
        ]

        random.shuffle(ria_records)
        lenta_records = [
            r for r in lenta_records
            if r['date'][:4] in ['2010', '2011', '2012', '2013', '2014']
        ]

        all_records = lenta_records + ria_records[:len(lenta_records)]

        random.shuffle(all_records)
        full_dataset = AgencyTitleDatasetClassification(
            all_records,
            tokenizer,
            agency_list,
            max_tokens_text=max_tokens_text,
            max_tokens_title=max_tokens_title)
    elif dataset_type == 'lenta-ria-clusters':
        full_dataset = LentaRiaDatasetClassification(train_file, tokenizer,
                                                     agency_list,
                                                     max_tokens_text,
                                                     max_tokens_title)

    print("Building datasets...")

    train_size = int(train_fraq * len(full_dataset))
    test_size = int((1 - train_fraq) * 0.5 * len(full_dataset))

    train_dataset, test_dataset, eval_dataset = \
        torch.utils.data.random_split(full_dataset, [train_size, test_size, len(full_dataset) - train_size - test_size])

    wandb.summary.update({
        'Train dataset size': len(train_dataset),
        'Val dataset size': len(eval_dataset),
        'Test dataset size': len(test_dataset),
    })

    print("Initializing model...")
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path, num_labels=len(agency_list))

    print("Training model...")
    batch_size = config["batch_size"]
    logging_steps = config["logging_steps"]
    save_steps = config["save_steps"]
    eval_steps = config["eval_steps"]
    warmup_steps = config["num_warmup_steps"]
    gradient_accumulation_steps = config["gradient_accumulation_steps"]
    max_steps = config["max_steps"]
    lr = config["learning_rate"]

    training_args = TrainingArguments(
        output_dir=output_model_path,
        do_train=True,
        do_eval=True,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        evaluation_strategy='steps',
        learning_rate=lr,
        warmup_steps=warmup_steps,
        overwrite_output_dir=False,
        logging_steps=logging_steps,
        eval_steps=eval_steps,
        save_steps=save_steps,
        max_steps=max_steps,
        save_total_limit=1,
        weight_decay=0.01,
        report_to='wandb',
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    wandb.summary.update(
        {'Test Evaluation': trainer.evaluate(eval_dataset=test_dataset)})
    model.save_pretrained(output_model_path)
Exemple #7
0
 def load_from_config(self):
     setattr(self.config, 'num_labels', self.num_labels)
     self.transformer = AutoModelForSequenceClassification.from_config(
         self.config)
def main():
    parser = HfArgumentParser(
        (DataTrainingArguments, TeacherModelArguments, StudentModelArguments,
         DistillTrainingArguments),
        description=DESCRIPTION,
    )

    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        data_args, teacher_args, student_args, training_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    else:
        data_args, teacher_args, student_args, training_args = parser.parse_args_into_dataclasses(
        )

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(
            training_args.output_dir
    ) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(
                training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome.")
        elif last_checkpoint is not None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )
    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank
                                                    ) else logging.WARN)

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        +
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(training_args.local_rank):
        utils.logging.set_verbosity_info()
        utils.logging.enable_default_handler()
        utils.logging.enable_explicit_format()

    if training_args.local_rank != -1:
        raise ValueError("Distributed training is not currently supported.")
    if training_args.tpu_num_cores is not None:
        raise ValueError("TPU acceleration is not currently supported.")

    logger.info(f"Training/evaluation parameters {training_args}")

    # Set seed before initializing model.
    set_seed(training_args.seed)

    # 1. read in data
    examples = read_lines(data_args.data_file)
    class_names = read_lines(data_args.class_names_file)

    # 2. get teacher predictions and load into dataset
    logger.info("Generating predictions from zero-shot teacher model")
    teacher_soft_preds = get_teacher_predictions(
        teacher_args.teacher_name_or_path,
        examples,
        class_names,
        teacher_args.hypothesis_template,
        teacher_args.teacher_batch_size,
        teacher_args.temperature,
        teacher_args.multi_class,
        data_args.use_fast_tokenizer,
        training_args.no_cuda,
        training_args.fp16,
    )
    dataset = Dataset.from_dict({
        "text": examples,
        "labels": teacher_soft_preds,
    })

    # 3. create student
    logger.info("Initializing student model")
    model = AutoModelForSequenceClassification.from_pretrained(
        student_args.student_name_or_path, num_labels=len(class_names))
    tokenizer = AutoTokenizer.from_pretrained(
        student_args.student_name_or_path,
        use_fast=data_args.use_fast_tokenizer)
    model.config.id2label = {i: label for i, label in enumerate(class_names)}
    model.config.label2id = {label: i for i, label in enumerate(class_names)}

    # 4. train student on teacher predictions
    dataset = dataset.map(tokenizer, input_columns="text")
    dataset.set_format("torch")

    def compute_metrics(p, return_outputs=False):
        preds = p.predictions.argmax(-1)
        proxy_labels = p.label_ids.argmax(
            -1)  # "label_ids" are actually distributions
        return {"agreement": (preds == proxy_labels).mean().item()}

    trainer = DistillationTrainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=dataset,
        compute_metrics=compute_metrics,
    )

    if training_args.do_train:
        logger.info("Training student model on teacher predictions")
        trainer.train()

    if training_args.do_eval:
        agreement = trainer.evaluate(eval_dataset=dataset)["eval_agreement"]
        logger.info(
            f"Agreement of student and teacher predictions: {agreement * 100:0.2f}%"
        )

    trainer.save_model()
        return max(len(d) for d in self.datasets)


trainset = FeverLabelPredictionDataset(args.train)
devset = FeverLabelPredictionDataset(args.dev)
if args.batch_size_unsup_ratio:
    unsupset = FeverLabelPredictionDataset_UDA(args.data_uda, is_aug=False)#[int(len(trainset)*.7):]
    augset = FeverLabelPredictionDataset_UDA(args.data_uda, is_aug=True)#[int(len(trainset)*.7):]
    assert len(unsupset) == len(augset)
    concatset = ConcatDataset(unsupset, augset)
    batch_size_unsup = int(args.batch_size_gpu * args.batch_size_unsup_ratio)


tokenizer = AutoTokenizer.from_pretrained(args.model)
config = AutoConfig.from_pretrained(args.model, num_labels=3)
model = AutoModelForSequenceClassification.from_pretrained(args.model, config=config).to(device)
optimizer = torch.optim.Adam([
    # If you are using non-roberta based models, change this to point to the right base
    {'params': model.roberta.parameters(), 'lr': args.lr_base},
    {'params': model.classifier.parameters(), 'lr': args.lr_linear}
])
scheduler = get_cosine_schedule_with_warmup(optimizer, 0, 20)


def encode(claims: List[str], rationale: List[str]):
    encoded_dict = tokenizer.batch_encode_plus(
        zip(rationale, claims),
        pad_to_max_length=True,
        return_tensors='pt')
    if encoded_dict['input_ids'].size(1) > 512:
        # Too long for the model. Truncate it
Exemple #10
0
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import nlpaug.augmenter.char as nac
import json
import pandas as pd

# Load model
tokenizer = AutoTokenizer.from_pretrained(
    "distilbert-base-uncased-finetuned-sst-2-english")
inference_model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased-finetuned-sst-2-english")
model = pipeline("sentiment-analysis",
                 model=inference_model,
                 tokenizer=tokenizer)

# Define text perturbation
aug = nac.KeyboardAug(aug_word_max=1)  # Insert realistic keystroke errors


def typo(input):
    output = aug.augment(input)
    return (output)


def eval_perturb(input_a, input_b):
    output_a, output_b = model([input_a, input_b])
    sq_error = (output_a["score"] - output_b["score"])**2
    acc = output_a["label"] == output_b["label"]
    return (sq_error, acc, output_b["score"])


# Read in our test dataset
Exemple #11
0
def train_func(config: Dict[str, Any]):
    # Accelerator reads from this environment variable for GPU placement.
    os.environ["LOCAL_RANK"] = str(ray.train.local_rank())
    os.environ["WORLD_SIZE"] = str(ray.train.world_size())

    args = config["args"]
    # Initialize the accelerator. We will let the accelerator handle device
    # placement for us in this example.
    accelerator = Accelerator(cpu=not args.use_gpu)
    # Make one log on every process with the configuration for debugging.
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )
    logger.info(accelerator.state)

    # Setup logging, we only want one process per machine to log things on
    # the screen. accelerator.is_local_main_process is only True for one
    # process per machine.
    logger.setLevel(
        logging.INFO if accelerator.is_local_main_process else logging.ERROR)
    if accelerator.is_local_main_process:
        datasets.utils.logging.set_verbosity_warning()
        transformers.utils.logging.set_verbosity_info()
    else:
        datasets.utils.logging.set_verbosity_error()
        transformers.utils.logging.set_verbosity_error()

    # If passed along, set the training seed now.
    if args.seed is not None:
        set_seed(args.seed)

    # Get the datasets: you can either provide your own CSV/JSON training and
    # evaluation files (see below) or specify a GLUE benchmark task (the
    # dataset will be downloaded automatically from the datasets Hub).

    # For CSV/JSON files, this script will use as labels the column called
    # 'label' and as pair of sentences the sentences in columns called
    # 'sentence1' and 'sentence2' if such column exists or the first two
    # columns not named label if at least two columns are provided.

    # If the CSVs/JSONs contain only one non-label column, the script does
    # single sentence classification on this single column. You can easily
    # tweak this behavior (see below)

    # In distributed training, the load_dataset function guarantee that only
    # one local process can concurrently download the dataset.
    if args.task_name is not None:
        # Downloading and loading a dataset from the hub.
        raw_datasets = load_dataset("glue", args.task_name)
    else:
        # Loading the dataset from local csv or json file.
        data_files = {}
        if args.train_file is not None:
            data_files["train"] = args.train_file
        if args.validation_file is not None:
            data_files["validation"] = args.validation_file
        extension = (args.train_file if args.train_file is not None else
                     args.valid_file).split(".")[-1]
        raw_datasets = load_dataset(extension, data_files=data_files)
    # See more about loading any type of standard or custom dataset at
    # https://huggingface.co/docs/datasets/loading_datasets.html.

    # Labels
    if args.task_name is not None:
        is_regression = args.task_name == "stsb"
        if not is_regression:
            label_list = raw_datasets["train"].features["label"].names
            num_labels = len(label_list)
        else:
            num_labels = 1
    else:
        # Trying to have good defaults here, don't hesitate to tweak to your
        # needs.
        is_regression = raw_datasets["train"].features["label"].dtype in [
            "float32",
            "float64",
        ]
        if is_regression:
            num_labels = 1
        else:
            # A useful fast method:
            # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique # noqa:E501
            label_list = raw_datasets["train"].unique("label")
            label_list.sort()  # Let's sort it for determinism
            num_labels = len(label_list)

    # Load pretrained model and tokenizer
    #
    # In distributed training, the .from_pretrained methods guarantee that
    # only one local process can concurrently download model & vocab.
    config = AutoConfig.from_pretrained(args.model_name_or_path,
                                        num_labels=num_labels,
                                        finetuning_task=args.task_name)
    tokenizer = AutoTokenizer.from_pretrained(
        args.model_name_or_path, use_fast=not args.use_slow_tokenizer)
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    )

    # Preprocessing the datasets
    if args.task_name is not None:
        sentence1_key, sentence2_key = task_to_keys[args.task_name]
    else:
        # Again, we try to have some nice defaults but don't hesitate to
        # tweak to your use case.
        non_label_column_names = [
            name for name in raw_datasets["train"].column_names
            if name != "label"
        ]
        if ("sentence1" in non_label_column_names
                and "sentence2" in non_label_column_names):
            sentence1_key, sentence2_key = "sentence1", "sentence2"
        else:
            if len(non_label_column_names) >= 2:
                sentence1_key, sentence2_key = non_label_column_names[:2]
            else:
                sentence1_key, sentence2_key = non_label_column_names[0], None

    # Some models have set the order of the labels to use,
    # so let's make sure we do use it.
    label_to_id = None
    if (model.config.label2id !=
            PretrainedConfig(num_labels=num_labels).label2id
            and args.task_name is not None and not is_regression):
        # Some have all caps in their config, some don't.
        label_name_to_id = {
            k.lower(): v
            for k, v in model.config.label2id.items()
        }
        if list(sorted(label_name_to_id.keys())) == list(  # noqa:C413
                sorted(label_list)):  # noqa:C413
            logger.info(
                f"The configuration of the model provided the following label "
                f"correspondence: {label_name_to_id}. Using it!")
            label_to_id = {
                i: label_name_to_id[label_list[i]]
                for i in range(num_labels)
            }
        else:
            logger.warning(
                "Your model seems to have been trained with labels, "
                "but they don't match the dataset: ",
                f"model labels: {list(sorted(label_name_to_id.keys()))}, "  # noqa:C413,E501
                f"dataset labels: {list(sorted(label_list))}."  # noqa:C413
                "\nIgnoring the model labels as a result.",
            )
    elif args.task_name is None:
        label_to_id = {v: i for i, v in enumerate(label_list)}

    if label_to_id is not None:
        model.config.label2id = label_to_id
        model.config.id2label = {
            id: label
            for label, id in config.label2id.items()
        }

    padding = "max_length" if args.pad_to_max_length else False

    def preprocess_function(examples):
        # Tokenize the texts
        texts = ((examples[sentence1_key], ) if sentence2_key is None else
                 (examples[sentence1_key], examples[sentence2_key]))
        result = tokenizer(*texts,
                           padding=padding,
                           max_length=args.max_length,
                           truncation=True)

        if "label" in examples:
            if label_to_id is not None:
                # Map labels to IDs (not necessary for GLUE tasks)
                result["labels"] = [
                    label_to_id[l] for l in examples["label"]  # noqa:E741
                ]
            else:
                # In all cases, rename the column to labels because the model
                # will expect that.
                result["labels"] = examples["label"]
        return result

    processed_datasets = raw_datasets.map(
        preprocess_function,
        batched=True,
        remove_columns=raw_datasets["train"].column_names,
        desc="Running tokenizer on dataset",
    )

    train_dataset = processed_datasets["train"]
    eval_dataset = processed_datasets["validation_matched" if args.task_name ==
                                      "mnli" else "validation"]

    # Log a few random samples from the training set:
    for index in random.sample(range(len(train_dataset)), 3):
        logger.info(
            f"Sample {index} of the training set: {train_dataset[index]}.")

    # DataLoaders creation:
    if args.pad_to_max_length:
        # If padding was already done ot max length, we use the default data
        # collator that will just convert everything to tensors.
        data_collator = default_data_collator
    else:
        # Otherwise, `DataCollatorWithPadding` will apply dynamic padding for
        # us (by padding to the maximum length of the samples passed). When
        # using mixed precision, we add `pad_to_multiple_of=8` to pad all
        # tensors to multiple of 8s, which will enable the use of Tensor
        # Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
        data_collator = DataCollatorWithPadding(
            tokenizer,
            pad_to_multiple_of=(8 if accelerator.use_fp16 else None))

    train_dataloader = DataLoader(
        train_dataset,
        shuffle=True,
        collate_fn=data_collator,
        batch_size=args.per_device_train_batch_size,
    )
    eval_dataloader = DataLoader(
        eval_dataset,
        collate_fn=data_collator,
        batch_size=args.per_device_eval_batch_size,
    )

    # Optimizer
    # Split weights in two groups, one with weight decay and the other not.
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate)

    # Prepare everything with our `accelerator`.
    model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
        model, optimizer, train_dataloader, eval_dataloader)

    # Note -> the training dataloader needs to be prepared before we grab
    # his length below (cause its length will be shorter in multiprocess)

    # Scheduler and math around the number of training steps.
    num_update_steps_per_epoch = math.ceil(
        len(train_dataloader) / args.gradient_accumulation_steps)
    if args.max_train_steps is None:
        args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
    else:
        args.num_train_epochs = math.ceil(args.max_train_steps /
                                          num_update_steps_per_epoch)

    lr_scheduler = get_scheduler(
        name=args.lr_scheduler_type,
        optimizer=optimizer,
        num_warmup_steps=args.num_warmup_steps,
        num_training_steps=args.max_train_steps,
    )

    # Get the metric function
    if args.task_name is not None:
        metric = load_metric("glue", args.task_name)
    else:
        metric = load_metric("accuracy")

    # Train!
    total_batch_size = (args.per_device_train_batch_size *
                        accelerator.num_processes *
                        args.gradient_accumulation_steps)

    logger.info("***** Running training *****")
    logger.info(f"  Num examples = {len(train_dataset)}")
    logger.info(f"  Num Epochs = {args.num_train_epochs}")
    logger.info(f"  Instantaneous batch size per device ="
                f" {args.per_device_train_batch_size}")
    logger.info(
        f"  Total train batch size (w. parallel, distributed & accumulation) "
        f"= {total_batch_size}")
    logger.info(
        f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
    logger.info(f"  Total optimization steps = {args.max_train_steps}")
    # Only show the progress bar once on each machine.
    progress_bar = tqdm(range(args.max_train_steps),
                        disable=not accelerator.is_local_main_process)
    completed_steps = 0

    for epoch in range(args.num_train_epochs):
        model.train()
        for step, batch in enumerate(train_dataloader):
            outputs = model(**batch)
            loss = outputs.loss
            loss = loss / args.gradient_accumulation_steps
            accelerator.backward(loss)
            if (step % args.gradient_accumulation_steps == 0
                    or step == len(train_dataloader) - 1):
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
                progress_bar.update(1)
                completed_steps += 1

            if completed_steps >= args.max_train_steps:
                break

        model.eval()
        for step, batch in enumerate(eval_dataloader):
            outputs = model(**batch)
            predictions = (outputs.logits.argmax(
                dim=-1) if not is_regression else outputs.logits.squeeze())
            metric.add_batch(
                predictions=accelerator.gather(predictions),
                references=accelerator.gather(batch["labels"]),
            )

        eval_metric = metric.compute()
        logger.info(f"epoch {epoch}: {eval_metric}")

    if args.output_dir is not None:
        accelerator.wait_for_everyone()
        unwrapped_model = accelerator.unwrap_model(model)
        unwrapped_model.save_pretrained(args.output_dir,
                                        save_function=accelerator.save)

    if args.task_name == "mnli":
        # Final evaluation on mismatched validation set
        eval_dataset = processed_datasets["validation_mismatched"]
        eval_dataloader = DataLoader(
            eval_dataset,
            collate_fn=data_collator,
            batch_size=args.per_device_eval_batch_size,
        )
        eval_dataloader = accelerator.prepare(eval_dataloader)

        model.eval()
        for step, batch in enumerate(eval_dataloader):
            outputs = model(**batch)
            predictions = outputs.logits.argmax(dim=-1)
            metric.add_batch(
                predictions=accelerator.gather(predictions),
                references=accelerator.gather(batch["labels"]),
            )

        eval_metric = metric.compute()
        logger.info(f"mnli-mm: {eval_metric}")
Exemple #12
0
def main():
    parser = argparse.ArgumentParser()

    # for evaluating on paper. Specify the split (e.g. train/valid/test)
    parser.add_argument("--infer-paper", type=str, default=None)
    # for evaluating data (to generate contrib_indices)
    parser.add_argument("--infer-jsonl", type=str, default=None)
    # for evaluating system outputs
    parser.add_argument("--decode-type", type=str, default="beam")
    parser.add_argument(
        "--decode-results",
        type=Path,
        nargs="+",
        help="Paths to evaluation experiment directories.",
        default=None,
    )
    parser.add_argument(
        "--mode",
        type=str,
        choices=["contrib", "other"],
        help="Side to check purity scores.",
    )
    # Required parameters
    parser.add_argument("--logdir", type=str, required=True)
    parser.add_argument(
        "--data_dir",
        default=None,
        type=Path,
        required=True,
    )
    parser.add_argument(
        "--model_name_or_path",
        default="allenai/scibert_scivocab_cased",
        type=str,
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written.",
    )

    # Other parameters
    parser.add_argument(
        "--tokenizer_name",
        default="allenai/scibert_scivocab_cased",
        type=str,
    )
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
    )
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action="store_true",
                        help="Whether to run eval on the test set.")
    parser.add_argument(
        "--evaluate_during_training",
        action="store_true",
        help="Rul evaluation during training at each logging step.",
    )

    parser.add_argument(
        "--per_gpu_train_batch_size",
        default=8,
        type=int,
        help="Batch size per GPU/CPU for training.",
    )
    parser.add_argument(
        "--per_gpu_eval_batch_size",
        default=8,
        type=int,
        help="Batch size per GPU/CPU for evaluation.",
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument(
        "--learning_rate",
        default=5e-5,
        type=float,
        help="The initial learning rate for Adam.",
    )
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument(
        "--num_train_epochs",
        default=5.0,
        type=float,
        help="Total number of training epochs to perform.",
    )
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help=
        "If > 0: set total number of training steps to perform. Override num_train_epochs.",
    )
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")

    parser.add_argument("--logging_steps",
                        type=int,
                        default=500,
                        help="Log every X updates steps.")
    parser.add_argument(
        "--save_steps",
        type=int,
        default=500,
        help="Save checkpoint every X updates steps.",
    )
    parser.add_argument(
        "--eval_all_checkpoints",
        action="store_true",
        help=
        "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
    )
    parser.add_argument("--no_cuda",
                        action="store_true",
                        help="Avoid using CUDA when available")
    parser.add_argument(
        "--overwrite_output_dir",
        action="store_true",
        help="Overwrite the content of the output directory",
    )
    parser.add_argument(
        "--overwrite_cache",
        action="store_true",
        help="Overwrite the cached training and evaluation sets",
    )
    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")

    parser.add_argument(
        "--fp16",
        action="store_true",
        help=
        "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
    )
    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )
    parser.add_argument(
        "--local_rank",
        type=int,
        default=-1,
        help="For distributed training: local_rank",
    )
    parser.add_argument("--server_ip",
                        type=str,
                        default="",
                        help="For distant debugging.")
    parser.add_argument("--server_port",
                        type=str,
                        default="",
                        help="For distant debugging.")
    args = parser.parse_args()

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    config = AutoConfig.from_pretrained(args.model_name_or_path)
    args.model_type = config.model_type
    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        config=config,
    )

    if args.local_rank == 0:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                tokenizer,
                                                evaluate=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = (model.module if hasattr(model, "module") else model
                         )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

        # Load a trained model and vocabulary that you have fine-tuned
        model = AutoModelForSequenceClassification.from_pretrained(
            args.output_dir)
        tokenizer = AutoTokenizer.from_pretrained(args.output_dir)
        model.to(args.device)

    # Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:

        if args.decode_results is not None:
            checkpoint = args.output_dir
            prefix = (checkpoint.split("/")[-1]
                      if checkpoint.find("checkpoint") != -1 else "")
            model = AutoModelForSequenceClassification.from_pretrained(
                checkpoint)
            model.to(args.device)
            inference_on_summary_outputs(args, model, tokenizer, prefix=prefix)

        elif args.infer_jsonl is not None:
            inference(args, model, tokenizer, prefix=prefix)

        elif args.infer_paper is not None:
            inference_on_paper_text(args, model, tokenizer, prefix="")

        else:
            checkpoints = [args.output_dir]
            if args.eval_all_checkpoints:
                checkpoints = list(
                    os.path.dirname(c) for c in sorted(
                        glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                                  recursive=True)))
                logging.getLogger("transformers.modeling_utils").setLevel(
                    logging.WARN)  # Reduce logging
            logger.info("Evaluate the following checkpoints: %s", checkpoints)
            for checkpoint in checkpoints:
                global_step = checkpoint.split(
                    "-")[-1] if len(checkpoints) > 1 else ""
                prefix = (checkpoint.split("/")[-1]
                          if checkpoint.find("checkpoint") != -1 else "")

                model = AutoModelForSequenceClassification.from_pretrained(
                    checkpoint)
                model.to(args.device)
                result = evaluate(args, model, tokenizer, prefix=prefix)
                result = dict((k + "_{}".format(global_step), v)
                              for k, v in result.items())
                results.update(result)

    return results
Exemple #13
0
def load_model():
    model = AutoModelForSequenceClassification.from_pretrained(
        model_checkpoint, num_labels=2)
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, fast=True)
    return Trainer(model=model), tokenizer
Exemple #14
0
warmup_steps = 1000 // batch_size

# Load k-fold Data
labels, commits, _ = load_cross_validation_split(use_filtered)

# Cross Validation
acc_list, prec_list, recall_list = [], [], []
th_accs_list, th_precs_list, th_recalls_list = [], [], []
prob_dict = {}
for fold_idx in range(n_fold):
    print("[Fold {}]".format(fold_idx + 1), end=" ")
    prob_dict["fold_{}".format(fold_idx + 1)] = {}

    # Init Tokenizer & Model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, num_labels=n_class)
    num_params = 0
    for param in model.parameters():
        num_params += param.numel()
    print("model size", num_params, end=" ")
    model.to(device)

    # Get Train/Eval Split
    label_eval, commit_eval = labels[fold_idx], commits[fold_idx]
    label_train, commit_train = [], []
    for idx in range(n_fold):
        if idx != fold_idx:
            label_train += labels[idx]
            commit_train += commits[idx]
    assert len(label_eval) == len(commit_eval)
    assert len(label_train) == len(commit_train)
    return tokenizer(examples[sentence1_key],
                     examples[sentence2_key],
                     truncation=True)


# %%
preprocess_function(dataset['train'][:5])
# %%
encoded_dataset = dataset.map(preprocess_function, batched=True)
# %%
# finetuning the model
from transformers import AutoModelForSequenceClassification, \
    TrainingArguments, Trainer

num_labels = 3 if task.startswith("mnli") else 1 if task == "stsb" else 2
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, num_labels=num_labels)

# %%
metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"

args = TrainingArguments(
    "test-glue",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
)
Exemple #16
0
def evaluate_style_gen_title(
    existing_run_name: str,
    existing_run_id: str,
    config_file: str,
    gen_model_file: str,
    discr_model_file: str,
    test_file: str,
    test_sample_rate: float,
):
    logging.set_verbosity_info()
    init_wandb(existing_run_name, None, existing_run_id)

    config = json.loads(jsonnet_evaluate_file(config_file))

    tokenizer_model_path = config["tokenizer_model_path"]
    tokenizer = BertTokenizer.from_pretrained(tokenizer_model_path, do_lower_case=False, do_basic_tokenize=False)

    max_tokens_text = config["max_tokens_text"]
    max_tokens_title = config["max_tokens_title"]
    setattr(tokenizer, 'max_tokens_text', max_tokens_text)

    batch_size = config["batch_size"]

    print("Loading model...")
    model = EncoderDecoderModel.from_pretrained(gen_model_file)
    model.eval()
    model.cuda()

    agency_list = config['agency_list']
    discriminator = AutoModelForSequenceClassification.from_pretrained(discr_model_file, num_labels=len(agency_list)).cuda()
    
    print("Fetching TG data...")
    test_records = [r for r in tqdm.tqdm(tg_reader(test_file)) 
        if random.random() <= test_sample_rate]
    
    print("Building datasets...")
    
    
    agency_to_special_token_id = {
        a: tokenizer.vocab[f'[unused{i+1}]'] for i, a in enumerate(agency_list)
    }

    agency_to_target = {a: i for i, a in enumerate(sorted(agency_list))}

    test_dataset = AgencyTitleDatasetGeneration(
        test_records, tokenizer,
        filter_agencies=list(agency_to_special_token_id.keys()),
        agency_to_special_token_id=agency_to_special_token_id,
        max_tokens_text=max_tokens_text, max_tokens_title=max_tokens_title
    )

    print('Dataset size:', len(test_dataset))

    y_pred = []
    y_true = []

    for i in tqdm.trange(0, len(test_dataset), batch_size):
        data = test_dataset[i]
        for k in tuple(data.keys()):
            if k not in ('input_ids', 'attention_mask'):
                del data[k]
            else:
                data[k] = data[k].unsqueeze(0)

        for j in range(i + 1, min(i + batch_size, len(test_dataset))):
            for k in data.keys():
                data[k] = torch.cat((data[k], test_dataset[j][k].unsqueeze(0)), dim=0)

        y_true.extend([ agency_to_target[test_dataset.get_strings(j)['agency']]
            for j in range(i, min(i + batch_size, len(test_dataset)))])

        data['input_ids'] = data['input_ids'].cuda()
        data['attention_mask'] = data['attention_mask'].cuda()

        output_ids = model.generate(
            **data,
            decoder_start_token_id=model.config.decoder.pad_token_id,
            min_length=7,
            max_length=20,
            num_beams=6
        )

        preds = [
            tokenizer.decode(first_sent(x, tokenizer.sep_token_id), skip_special_tokens=True) for x in output_ids
        ]

        for title in preds:
            inp = tokenizer(title, 
                add_special_tokens=True, max_length=max_tokens_title,
                padding='max_length', truncation=True
            )

            logits = discriminator(input_ids=torch.LongTensor(inp['input_ids']).cuda().unsqueeze(0), 
                                   attention_mask=torch.LongTensor(inp['attention_mask']).cuda().unsqueeze(0))[0]
            y_pred.append(torch.argmax(logits).item())

    wandb.summary.update({
        'D-Style': classification_report(y_true, y_pred, output_dict=True)
    })
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        model_checkpoint, num_labels=num_labels)
Exemple #18
0
nlp = pipeline("sentiment-analysis")
result = nlp("I love you")[0]
print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

## Sequence classification: Pharaphrases of each other?
# 1. instantiate a tokenizer and a model from the checkpoint name
# 2. build a sequence from the two sentences
# 3. pass this sequence through the model 0: not paraphrase, 1: is a paraphrase
# 4. compute the softmax and get probabilities over the classes
# 5. print the result

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-cased-finetuned-mrpc")

classes = ["not paraphrase", "is paraphrase"]

sequence_0 = "The company HuggingFace is based in New York City"
sequence_1 = "Apples are especially bad for your health"
sequence_2 = "HuggingFace headquarters are situated in Manhattan"  # "TypeError: Can't convert this to PyBool.....

paraphrase = tokenizer(sequence_0, sequence_1, sequence_2, return_tensors="pt")
not_paraphrase = tokenizer(sequence_0,
                           sequence_1,
                           sequence_2,
                           return_tensors="pt")

paraphrase_classification_logits = model(**paraphrase).logits
not_paraphrase_classification_logits = model(**not_paraphrase).logits
Exemple #19
0
def main():
    parser = argparse.ArgumentParser()
    # Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task.",
    )
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help=
        "Path to pretrained model or model identifier from huggingface.co/models",
    )
    parser.add_argument(
        "--task_name",
        default=None,
        type=str,
        required=True,
        help="The name of the task to train selected in the list: " +
        ", ".join(glue_processors.keys()),
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written.",
    )

    # Other parameters
    parser.add_argument(
        "--config_name",
        default="",
        type=str,
        help=
        "Pretrained config name or path if not the same as model_name_or_path",
    )
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help=
        "Pretrained tokenizer name or path if not the same as model_name_or_path",
    )
    parser.add_argument(
        "--cache_dir",
        default=None,
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from huggingface.co",
    )
    parser.add_argument(
        "--data_subset",
        type=int,
        default=-1,
        help="If > 0: limit the data to a subset of data_subset instances.")
    parser.add_argument("--overwrite_output_dir",
                        action="store_true",
                        help="Whether to overwrite data in output directory")
    parser.add_argument(
        "--overwrite_cache",
        action="store_true",
        help="Overwrite the cached training and evaluation sets")

    parser.add_argument("--dont_normalize_importance_by_layer",
                        action="store_true",
                        help="Don't normalize importance score by layers")
    parser.add_argument(
        "--dont_normalize_global_importance",
        action="store_true",
        help="Don't normalize all importance scores between 0 and 1",
    )

    parser.add_argument(
        "--try_masking",
        action="store_true",
        help="Whether to try to mask head until a threshold of accuracy.")
    parser.add_argument(
        "--masking_threshold",
        default=0.9,
        type=float,
        help=
        "masking threshold in term of metrics (stop masking when metric < threshold * original metric value).",
    )
    parser.add_argument(
        "--masking_amount",
        default=0.1,
        type=float,
        help="Amount to heads to masking at each masking step.")
    parser.add_argument("--metric_name",
                        default="acc",
                        type=str,
                        help="Metric to use for head masking.")

    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, sequences shorter padded.",
    )
    parser.add_argument("--batch_size",
                        default=1,
                        type=int,
                        help="Batch size.")

    parser.add_argument("--seed", type=int, default=42)
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument("--no_cuda",
                        action="store_true",
                        help="Whether not to use CUDA when available")
    parser.add_argument("--server_ip",
                        type=str,
                        default="",
                        help="Can be used for distant debugging.")
    parser.add_argument("--server_port",
                        type=str,
                        default="",
                        help="Can be used for distant debugging.")
    args = parser.parse_args()

    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup devices and distributed training
    if args.local_rank == -1 or args.no_cuda:
        args.device = torch.device("cuda" if torch.cuda.is_available()
                                   and not args.no_cuda else "cpu")
        args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        args.device = torch.device("cuda", args.local_rank)
        args.n_gpu = 1
        torch.distributed.init_process_group(
            backend="nccl")  # Initializes the distributed backend

    # Setup logging
    logging.basicConfig(
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    logger.info("device: {} n_gpu: {}, distributed: {}".format(
        args.device, args.n_gpu, bool(args.local_rank != -1)))
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(args.local_rank):
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()

    # Set seeds
    set_seed(args.seed)

    # Prepare GLUE task
    args.task_name = args.task_name.lower()
    if args.task_name not in glue_processors:
        raise ValueError("Task not found: %s" % (args.task_name))
    processor = glue_processors[args.task_name]()
    args.output_mode = glue_output_modes[args.task_name]
    label_list = processor.get_labels()
    num_labels = len(label_list)

    # Load pretrained model and tokenizer
    #
    # Distributed training:
    # The .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.

    config = AutoConfig.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=args.task_name,
        output_attentions=True,
        cache_dir=args.cache_dir,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        cache_dir=args.cache_dir,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
        cache_dir=args.cache_dir,
    )

    # Distributed and parallel training
    model.to(args.device)
    if args.local_rank != -1:
        model = nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)
    elif args.n_gpu > 1:
        model = nn.DataParallel(model)

    # Print/save training arguments
    os.makedirs(args.output_dir, exist_ok=True)
    torch.save(args, os.path.join(args.output_dir, "run_args.bin"))
    logger.info("Training/evaluation parameters %s", args)

    # Prepare dataset for the GLUE task
    eval_dataset = GlueDataset(args, tokenizer=tokenizer, mode="dev")
    if args.data_subset > 0:
        eval_dataset = Subset(
            eval_dataset, list(range(min(args.data_subset,
                                         len(eval_dataset)))))
    eval_sampler = SequentialSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.batch_size,
                                 collate_fn=default_data_collator)

    # Compute head entropy and importance score
    compute_heads_importance(args, model, eval_dataloader)

    # Try head masking (set heads to zero until the score goes under a threshole)
    # and head pruning (remove masked heads and see the effect on the network)
    if args.try_masking and args.masking_threshold > 0.0 and args.masking_threshold < 1.0:
        head_mask = mask_heads(args, model, eval_dataloader)
        prune_heads(args, model, eval_dataloader, head_mask)
Exemple #20
0
        "dbpedia_14": {"keys": ("text", None), "num_classes": 14, "task_type": "topic"},
        "yahoo_answers_topics": {"keys": ("text", None), "num_classes": 10, "task_type": "topic"},
        "imdb": {"keys": ("text", None), "num_classes": 2, "task_type": "sentiment"},
        "amazon_polarity": {"keys": ("text", None), "num_classes": 2, "task_type": "sentiment"},
        "yelp_polarity": {"keys": ("text", None), "num_classes": 2, "task_type": "sentiment"}
    }
    sentence1_key, sentence2_key = task_to_keys[task]["keys"]
    num_classes = task_to_keys[task]["num_classes"]
    task_type = task_to_keys[task]["task_type"]
            
    #############################################################
    ## Model + Tokenizer ########################################
    #############################################################
    checkpoint = save_dir + MODEL_NAME + '-' + task + '-' + t + '-' + str(num_train_per_class)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_classes).to(device)

    #############################################################
    ## Dataset Preparation ######################################
    #############################################################

    if "Ada-" in t:
        train_data_path = os.path.join(data_dir, task, 'ORIG', task + '_train_' + str(num_train_per_class))
    else:
        train_data_path = os.path.join(data_dir, task, t, task + '_train_' + str(num_train_per_class))
    valid_data_path = os.path.join(data_dir, task, 'ORIG', task + '_valid_' + str(num_valid_per_class))

    train_dataset = load_from_disk(train_data_path).shuffle()
    eval_dataset  = load_from_disk(valid_data_path)
    test_dataset  = load_dataset(task, split='test')
    classifier = pipeline('sentiment-analysis')
    results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])

    for result in results:
        print(f"label:{result['label']},with score:{round(result['score'], 4)}")

    ''''
    输出的结果为:
    label:POSITIVE,with score:0.9998
    label:NEGATIVE,with score:0.5309
    '''


from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

# classifier = pipeline('sentiment-analysis', model="nlptown/bert-base-multilingual-uncased-sentiment")
classifier = pipeline('sentiment-analysis', model=model)
results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])

for result in results:
    print(f"label:{result['label']},with score:{round(result['score'], 4)}")

'''
指定了模型
输出的结果为:

'''
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))

    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses(
        )

    if (os.path.exists(training_args.output_dir)
            and os.listdir(training_args.output_dir) and training_args.do_train
            and not training_args.overwrite_output_dir):
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
        )

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO
        if training_args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        training_args.local_rank,
        training_args.device,
        training_args.n_gpu,
        bool(training_args.local_rank != -1),
        training_args.fp16,
    )
    logger.info("Training/evaluation parameters %s", training_args)

    # Set seed
    set_seed(training_args.seed)

    try:
        num_labels = glue_tasks_num_labels[data_args.task_name]
        output_mode = glue_output_modes[data_args.task_name]
    except KeyError:
        raise ValueError("Task not found: %s" % (data_args.task_name))

    # Load pretrained model and tokenizer
    #
    # Distributed training:
    # The .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.

    config = AutoConfig.from_pretrained(
        model_args.config_name
        if model_args.config_name else model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name
        if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
    )

    # Get datasets
    train_dataset = GlueDataset(
        data_args, tokenizer=tokenizer) if training_args.do_train else None
    eval_dataset = GlueDataset(data_args, tokenizer=tokenizer,
                               mode="dev") if training_args.do_eval else None
    test_dataset = GlueDataset(
        data_args, tokenizer=tokenizer,
        mode="test") if training_args.do_predict else None

    def compute_metrics(p: EvalPrediction) -> Dict:
        if output_mode == "classification":
            preds = np.argmax(p.predictions, axis=1)
        elif output_mode == "regression":
            preds = np.squeeze(p.predictions)
        return glue_compute_metrics(data_args.task_name, preds, p.label_ids)

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    # Training
    if training_args.do_train:
        trainer.train(model_path=model_args.model_name_or_path if os.path.
                      isdir(model_args.model_name_or_path) else None)
        trainer.save_model()
        # For convenience, we also re-save the tokenizer to the same directory,
        # so that you can share your model easily on huggingface.co/models =)
        if trainer.is_world_master():
            tokenizer.save_pretrained(training_args.output_dir)

    # Evaluation
    eval_results = {}
    if training_args.do_eval:
        logger.info("*** Evaluate ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        eval_datasets = [eval_dataset]
        if data_args.task_name == "mnli":
            mnli_mm_data_args = dataclasses.replace(data_args,
                                                    task_name="mnli-mm")
            eval_datasets.append(
                GlueDataset(mnli_mm_data_args, tokenizer=tokenizer,
                            mode="dev"))

        for eval_dataset in eval_datasets:
            eval_result = trainer.evaluate(eval_dataset=eval_dataset)

            output_eval_file = os.path.join(
                training_args.output_dir,
                f"eval_results_{eval_dataset.args.task_name}.txt")
            if trainer.is_world_master():
                with open(output_eval_file, "w") as writer:
                    logger.info("***** Eval results {} *****".format(
                        eval_dataset.args.task_name))
                    for key, value in eval_result.items():
                        logger.info("  %s = %s", key, value)
                        writer.write("%s = %s\n" % (key, value))

            eval_results.update(eval_result)

    if training_args.do_predict:
        logging.info("*** Test ***")
        test_datasets = [test_dataset]
        if data_args.task_name == "mnli":
            mnli_mm_data_args = dataclasses.replace(data_args,
                                                    task_name="mnli-mm")
            test_datasets.append(
                GlueDataset(mnli_mm_data_args,
                            tokenizer=tokenizer,
                            mode="test"))

        for test_dataset in test_datasets:
            predictions = trainer.predict(
                test_dataset=test_dataset).predictions
            if output_mode == "classification":
                predictions = np.argmax(predictions, axis=1)

            output_test_file = os.path.join(
                training_args.output_dir,
                f"test_results_{test_dataset.args.task_name}.txt")
            if trainer.is_world_master():
                with open(output_test_file, "w") as writer:
                    logger.info("***** Test results {} *****".format(
                        test_dataset.args.task_name))
                    writer.write("index\tprediction\n")
                    for index, item in enumerate(predictions):
                        if output_mode == "regression":
                            writer.write("%d\t%3.3f\n" % (index, item))
                        else:
                            item = test_dataset.get_labels()[item]
                            writer.write("%d\t%s\n" % (index, item))
    return eval_results
Exemple #23
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    # elif len(sys.argv)==1: # parse from local dict
    #     model_args, data_args, training_args = parser.parse_dict(args_dict)
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses(
        )

    # easy way to modify args, could insert for loop here to do hyperparam search (trainer does it too)
    # training_args.model_name_or_path = "bert-base-uncased"

    if (os.path.exists(training_args.output_dir)
            and os.listdir(training_args.output_dir) and training_args.do_train
            and not training_args.overwrite_output_dir):
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
        )

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO
        if training_args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        training_args.local_rank,
        training_args.device,
        training_args.n_gpu,
        bool(training_args.local_rank != -1),
        training_args.fp16,
    )
    logger.info("Training/evaluation parameters %s", training_args)

    # Set seed
    set_seed(training_args.seed)

    try:
        num_labels = glue_tasks_num_labels[data_args.task_name]
        output_mode = glue_output_modes[data_args.task_name]
    except KeyError:
        raise ValueError("Task not found: %s" % (data_args.task_name))

    # Load pretrained model and tokenizer
    #
    # Distributed training:
    # The .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.

    config = AutoConfig.from_pretrained(
        model_args.config_name
        if model_args.config_name else model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name
        if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
    )

    from transformers import AutoModel
    print('\nsequence classification model params')
    # print(model.named_parameters())
    for name, param in model.named_parameters():
        if "classifier" in name: print(name)
    model_testing = AutoModel.from_pretrained("google/mobilebert-uncased")
    print('\nmlm model params')
    for name, param in model_testing.named_parameters():
        if "classifier" in name: print(name)

    # Get datasets
    train_dataset = (GlueDataset(
        data_args, tokenizer=tokenizer, cache_dir=model_args.cache_dir)
                     if training_args.do_train else None)
    eval_dataset = (GlueDataset(data_args,
                                tokenizer=tokenizer,
                                mode="dev",
                                cache_dir=model_args.cache_dir)
                    if training_args.do_eval else None)
    test_dataset = (GlueDataset(data_args,
                                tokenizer=tokenizer,
                                mode="test",
                                cache_dir=model_args.cache_dir)
                    if training_args.do_predict else None)

    def build_compute_metrics_fn(
            task_name: str) -> Callable[[EvalPrediction], Dict]:
        def compute_metrics_fn(p: EvalPrediction):
            preds = p.predictions[0] if isinstance(p.predictions,
                                                   tuple) else p.predictions
            if output_mode == "classification":
                preds = np.argmax(preds, axis=1)
            else:  # regression
                preds = np.squeeze(preds)
            return glue_compute_metrics(task_name, preds, p.label_ids)

        return compute_metrics_fn

    # logdir = training_args.output_dir +'/'+ datetime.now().strftime("%Y%m%d-%H%M%S")
    # writer = SummaryWriter(log_dir=logdir)
    # Initialize our Trainer
    trainer = Trainer(model=model,
                      args=training_args,
                      train_dataset=train_dataset,
                      eval_dataset=eval_dataset,
                      compute_metrics=build_compute_metrics_fn(
                          data_args.task_name)
                      # tb_writer=writer
                      )

    # Training
    if training_args.do_train:
        trainer.train(model_path=model_args.model_name_or_path if os.path.
                      isdir(model_args.model_name_or_path) else None)
        trainer.save_model()
        # For convenience, we also re-save the tokenizer to the same directory,
        # so that you can share your model easily on huggingface.co/models =)
        if trainer.is_world_master():
            tokenizer.save_pretrained(training_args.output_dir)

    # Evaluation
    eval_results = {}
    if training_args.do_eval:
        logger.info("*** Evaluate ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        eval_datasets = [eval_dataset]
        if data_args.task_name == "mnli":
            mnli_mm_data_args = dataclasses.replace(data_args,
                                                    task_name="mnli-mm")
            eval_datasets.append(
                GlueDataset(mnli_mm_data_args,
                            tokenizer=tokenizer,
                            mode="dev",
                            cache_dir=model_args.cache_dir))

        for eval_dataset in eval_datasets:
            trainer.compute_metrics = build_compute_metrics_fn(
                eval_dataset.args.task_name)
            then = time.time()
            eval_result = trainer.evaluate(eval_dataset=eval_dataset)
            elapsed = time.time() - then
            print("Eval took {} seconds".format(elapsed))
            print("throughput: {} inf/sec".format(len(eval_dataset) / elapsed))
            output_eval_file = os.path.join(
                training_args.output_dir,
                f"eval_results_{eval_dataset.args.task_name}.txt")
            if trainer.is_world_master():
                with open(output_eval_file, "w") as writer:
                    logger.info("***** Eval results {} *****".format(
                        eval_dataset.args.task_name))
                    for key, value in eval_result.items():
                        logger.info("  %s = %s", key, value)
                        writer.write("%s = %s\n" % (key, value))

            eval_results.update(eval_result)

    if training_args.do_predict:
        logging.info("*** Test ***")
        test_datasets = [test_dataset]
        if data_args.task_name == "mnli":
            mnli_mm_data_args = dataclasses.replace(data_args,
                                                    task_name="mnli-mm")
            test_datasets.append(
                GlueDataset(mnli_mm_data_args,
                            tokenizer=tokenizer,
                            mode="test",
                            cache_dir=model_args.cache_dir))

        for test_dataset in test_datasets:
            predictions = trainer.predict(
                test_dataset=test_dataset).predictions
            if output_mode == "classification":
                predictions = np.argmax(predictions, axis=1)

            output_test_file = os.path.join(
                training_args.output_dir,
                f"test_results_{test_dataset.args.task_name}.txt")
            if trainer.is_world_master():
                with open(output_test_file, "w") as writer:
                    logger.info("***** Test results {} *****".format(
                        test_dataset.args.task_name))
                    writer.write("index\tprediction\n")
                    for index, item in enumerate(predictions):
                        if output_mode == "regression":
                            writer.write("%d\t%3.3f\n" % (index, item))
                        else:
                            item = test_dataset.get_labels()[item]
                            writer.write("%d\t%s\n" % (index, item))
    return eval_results
Exemple #24
0
def test(sarc_percentage):
    # BERT-base and RoBERTa-base models fine-tuned on sentiment anlaysis datasets (SST-2 and IMDB).
    models = [
        'textattack/bert-base-uncased-SST-2',
        'textattack/roberta-base-SST-2',
        'textattack/bert-base-uncased-imdb',
        'textattack/roberta-base-imdb',
    ]

    # Datasets - GEN, HYP, RQ, SemEval (sarcasm only) separatly.
    gen_sarc = pd.read_csv('/scratch/ec2684/GEN-sarc-notsarc.csv')
    hyp_sarc = pd.read_csv('/scratch/ec2684/HYP-sarc-notsarc.csv')
    rg_sarc = pd.read_csv('/scratch/ec2684/RQ-sarc-notsarc.csv')
    sem_eval = pd.read_csv('/scratch/ec2684/SemEval2018-T3-train-taskA.csv')

    # A csv file that contains the model prediction results of testing the above four sarcastic models.
    sarc_model_pred_report = pd.read_csv('/scratch/ec2684/report.csv')

    ## Generating a dataset with only sarcastic examples from the above four datasets.
    gen_sarc_data = evaluate.extrac_sarc_only(gen_sarc, False)
    hyp_sarc_data = evaluate.extrac_sarc_only(hyp_sarc, False)
    rg_sarc_data = evaluate.extrac_sarc_only(rg_sarc, False)
    sem_sarc_data = evaluate.extrac_sarc_only(sem_eval, True)

    dataset_name = [
        'GEN-sarc-notsarc.csv', 'HYP-sarc-notsarc.csv', 'RQ-sarc-notsarc.csv',
        'SemEval2018-T3-train-taskA.csv'
    ]
    sarc_datasets = [gen_sarc_data, hyp_sarc_data, rg_sarc_data, sem_sarc_data]
    sarc_labels = []

    # Labels for the sarcastic examples only datasets.
    for dataset in sarc_datasets:
        sarc_labels.append(np.zeros(len(dataset), dtype='int'))

    ## IMDB dataset instantiation.
    imdb_test = load_dataset('imdb', split='test')
    imdb_test_positive = imdb_test.filter(
        lambda example: example['label'] == 1)
    imdb_test_negative = imdb_test.filter(
        lambda example: example['label'] == 0)
    imdb_test_positive = imdb_test_positive.sort(column='text')
    imdb_test_negative = imdb_test_negative.sort(column='text')
    tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
    imdb_test_positive = imdb_test_positive.filter(lambda example: len(
        tokenizer(example['text'])['attention_mask']) < 512)
    imdb_test_negative = imdb_test_negative.filter(lambda example: len(
        tokenizer(example['text'])['attention_mask']) < 512)

    # Extracting the samples from the sarcastic dataset that the models have predicted as being negative.
    # Dictionary of such data
    neg_pred_sarc_dataset = {}

    for i in range(6):
        k = len(dataset_name) * i
        dict_temp = {}
        for j in range(len(dataset_name)):
            labels = sarc_model_pred_report['Correct index'][k + j]
            neg_pred_index = list(map(int, labels[1:-1].split(',')))
            truncated = []
            for index in neg_pred_index:
                truncated.append(sarc_datasets[j][index])
            dict_temp[sarc_model_pred_report['Dataset'][k + j]] = truncated

        neg_pred_sarc_dataset[sarc_model_pred_report['Model'][k]] = dict_temp

    comprehensive_results = []

    index = 1
    clean_comprehensive_results = []
    perturbation_first_results = []
    perturbation_last_results = []

    var = 50
    imdb_token_length = 400
    pert_token_length = 100
    imdb_test_positive = imdb_test_positive.filter(
        lambda example: len(tokenizer(example['text'])['attention_mask']
                            ) < imdb_token_length + var)
    imdb_test_positive = imdb_test_positive.filter(
        lambda example: imdb_token_length - var < len(
            tokenizer(example['text'])['attention_mask']))
    imdb_test_negative = imdb_test_negative.filter(
        lambda example: len(tokenizer(example['text'])['attention_mask']
                            ) < imdb_token_length + var)
    imdb_test_negative = imdb_test_negative.filter(
        lambda example: imdb_token_length - var < len(
            tokenizer(example['text'])['attention_mask']))

    print(f'{sarc_percentage} keeping {imdb_token_length}/{pert_token_length}')
    # Iterating through the models and evaluating the results.
    for model in models:
        # Initialzing the model.
        tokenizer = AutoTokenizer.from_pretrained(model)
        model_init = AutoModelForSequenceClassification.from_pretrained(model)
        nlp_pipeline = pipeline("sentiment-analysis",
                                model=model_init,
                                tokenizer=tokenizer,
                                framework="pt",
                                device=0)

        print(f'\n\n({index}) Report for {model}.')
        subindex = 0
        for dataset in sarc_datasets:
            print(
                f'''\n({index}-{subindex+1}) Testing on {dataset_name[subindex]} dataset on only sarcastic data.\n'''
            )
            passage_raw = neg_pred_sarc_dataset[model][dataset_name[subindex]]
            output = evaluate.length_threshold(passage_raw,
                                               pert_token_length - var,
                                               pert_token_length + var,
                                               tokenizer)
            print(
                f'''\n{np.around(output['proportion'],4)*100}% of {dataset_name[subindex]} sarcastic data passed threshold test.\n'''
            )

            # Shuffling the order of random (sarcastic) perturbations.
            passage = output['dataset']
            random.shuffle(passage)
            imdb_length = len(imdb_test_positive['text'])

            print(
                f'\nTesting IMDB positive dataset:\n IMDB len: {imdb_length}\n Passage len: {len(passage)}'
            )
            n = imdb_length
            imdb_passage = imdb_test_positive['text']
            label = np.ones(n, dtype=int)

            print(f'\nWithout perturbations:')
            predictions, prediction_scores = evaluate.evaluate(
                imdb_passage, nlp_pipeline)
            binary_predictions_no_perturb, binary_labels, binary_prediction_scores, binary_original_index = evaluate.report_binary_metrics(
                predictions, prediction_scores, label)
            acc = evaluate.report_acc(binary_labels,
                                      binary_predictions_no_perturb)
            clean_summary = evaluate.summary(label, predictions,
                                             prediction_scores, model,
                                             dataset_name[subindex], acc)

            print(f'\nWith perturbations: perturbation + IMDB order.')
            perturbed_passage, ratio = evaluate.merge(imdb_passage, passage,
                                                      False)
            predictions, prediction_scores = evaluate.evaluate(
                perturbed_passage, nlp_pipeline)
            binary_predictions_perturb, binary_labels, binary_prediction_scores, binary_original_index = evaluate.report_binary_metrics(
                predictions, prediction_scores, label)
            acc = evaluate.report_acc(binary_labels,
                                      binary_predictions_perturb)
            no_change = np.count_nonzero(
                np.asarray(binary_predictions_no_perturb) == np.asarray(
                    binary_predictions_perturb))
            differences = len(binary_predictions_no_perturb) - no_change
            print(
                f'''\nFrom {len(binary_predictions_no_perturb)} predictions, perturbation + IMDB changed:\n {differences} labels\n {no_change} labels remained the same'''
            )
            perturb_first_summary = evaluate.perturb_summary(
                label, predictions, prediction_scores, model,
                dataset_name[subindex], acc, ratio, differences, no_change)

            print(f'\nWith perturbations: IMDB + perturbation order.')
            perturbed_passage, ratio = evaluate.merge(imdb_passage, passage,
                                                      True)
            predictions, prediction_scores = evaluate.evaluate(
                perturbed_passage, nlp_pipeline)
            binary_predictions_perturb, binary_labels, binary_prediction_scores, binary_original_index = evaluate.report_binary_metrics(
                predictions, prediction_scores, label)
            acc = evaluate.report_acc(binary_labels,
                                      binary_predictions_perturb)
            no_change = np.count_nonzero(
                np.asarray(binary_predictions_no_perturb) == np.asarray(
                    binary_predictions_perturb))
            differences = len(binary_predictions_no_perturb) - no_change
            print(
                f'''\nFrom {len(binary_predictions_no_perturb)} predictions, IMDB + perturbation changed:\n {differences} labels\n {no_change} labels remained the same'''
            )
            perturb_last_summary = evaluate.perturb_summary(
                label, predictions, prediction_scores, model,
                dataset_name[subindex], acc, ratio, differences, no_change)

            clean_comprehensive_results.append(clean_summary)
            perturbation_first_results.append(perturb_first_summary)
            perturbation_last_results.append(perturb_last_summary)
            subindex = subindex + 1
        index = index + 1
    return clean_comprehensive_results, perturbation_first_results, perturbation_last_results
Exemple #25
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help="The input data dir. Should contain the .tsv files (or other data files) for the task.",
    )
    parser.add_argument(
        "--model_type",
        default=None,
        type=str,
        required=True,
        help="Model type selected in the list: " + ", ".join(MODEL_TYPES),
    )
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS),
    )
    parser.add_argument(
        "--task_name",
        default=None,
        type=str,
        required=True,
        help="The name of the task to train selected in the list: " + ", ".join(processors.keys()),
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help="The output directory where the model predictions and checkpoints will be written.",
    )

    # Other parameters
    parser.add_argument(
        "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name",
    )
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help="Pretrained tokenizer name or path if not the same as model_name",
    )
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help="Where do you want to store the pre-trained models downloaded from s3",
    )
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help="The maximum total input sequence length after tokenization. Sequences longer "
        "than this will be truncated, sequences shorter will be padded.",
    )
    parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
    parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--evaluate_during_training", action="store_true", help="Run evaluation during training at each logging step.",
    )
    parser.add_argument(
        "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model.",
    )

    parser.add_argument(
        "--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.",
    )
    parser.add_argument(
        "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.",
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help="Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
    parser.add_argument(
        "--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.",
    )
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
    )
    parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")

    parser.add_argument("--logging_steps", type=int, default=500, help="Log every X updates steps.")
    parser.add_argument("--save_steps", type=int, default=500, help="Save checkpoint every X updates steps.")
    parser.add_argument(
        "--eval_all_checkpoints",
        action="store_true",
        help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
    )
    parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
    parser.add_argument(
        "--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory",
    )
    parser.add_argument(
        "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets",
    )
    parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")

    parser.add_argument(
        "--fp16",
        action="store_true",
        help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
    )
    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )
    parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
    parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.")
    parser.add_argument("--server_port", type=str, default="", help="For distant debugging.")
    args = parser.parse_args()

    if (
        os.path.exists(args.output_dir)
        and os.listdir(args.output_dir)
        and args.do_train
        and not args.overwrite_output_dir
    ):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
                args.output_dir
            )
        )

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)

    # Prepare GLUE task
    args.task_name = args.task_name.lower()
    if args.task_name not in processors:
        raise ValueError("Task not found: %s" % (args.task_name))
    processor = processors[args.task_name]()
    args.output_mode = output_modes[args.task_name]
    label_list = processor.get_labels()
    num_labels = len(label_list)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config = AutoConfig.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=args.task_name,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )

    if args.local_rank == 0:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args, args.task_name, tokenizer, evaluate=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)

    # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
    if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = (
            model.module if hasattr(model, "module") else model
        )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

        # Load a trained model and vocabulary that you have fine-tuned
        model = AutoModelForSequenceClassification.from_pretrained(args.output_dir)
        tokenizer = AutoTokenizer.from_pretrained(args.output_dir)
        model.to(args.device)

    # Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
            )
            logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split("/")[-1] if checkpoint.find("checkpoint") != -1 else ""

            model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args, model, tokenizer, prefix=prefix)
            result = dict((k + "_{}".format(global_step), v) for k, v in result.items())
            results.update(result)

    return results
def tune_transformer(num_samples=8,
                     gpus_per_trial=0,
                     smoke_test=False,
                     ray_address=None):
    ray.init(ray_address, log_to_driver=True)
    data_dir_name = "./data" if not smoke_test else "./test_data"
    data_dir = os.path.abspath(os.path.join(os.getcwd(), data_dir_name))
    if not os.path.exists(data_dir):
        os.mkdir(data_dir, 0o755)

    # Change these as needed.
    model_name = "bert-base-uncased" if not smoke_test \
        else "sshleifer/tiny-distilroberta-base"
    task_name = "rte"

    task_data_dir = os.path.join(data_dir, task_name.upper())

    num_labels = glue_tasks_num_labels[task_name]

    config = AutoConfig.from_pretrained(model_name,
                                        num_labels=num_labels,
                                        finetuning_task=task_name)

    # Download and cache tokenizer, model, and features
    print("Downloading and caching Tokenizer")
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Triggers tokenizer download to cache
    print("Downloading and caching pre-trained model")
    AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=config,
    )

    def get_model():
        return AutoModelForSequenceClassification.from_pretrained(
            model_name,
            config=config,
        )

    # Download data.
    download_data(task_name, data_dir)

    data_args = GlueDataTrainingArguments(task_name=task_name,
                                          data_dir=task_data_dir)

    train_dataset = GlueDataset(data_args,
                                tokenizer=tokenizer,
                                mode="train",
                                cache_dir=task_data_dir)
    eval_dataset = GlueDataset(data_args,
                               tokenizer=tokenizer,
                               mode="dev",
                               cache_dir=task_data_dir)

    training_args = TrainingArguments(
        output_dir=".",
        learning_rate=1e-5,  # config
        do_train=True,
        do_eval=True,
        evaluate_during_training=True,
        eval_steps=(len(train_dataset) // 16) +
        1 if not smoke_test else 1,  # config
        save_steps=(len(train_dataset) // 16) +
        1 if not smoke_test else 1,  # config,
        num_train_epochs=2,  # config
        max_steps=-1,
        per_device_train_batch_size=16,  # config
        per_device_eval_batch_size=16,  # config
        warmup_steps=0,
        weight_decay=0.1,  # config
        logging_dir="./logs",
    )

    trainer = Trainer(model_init=get_model,
                      args=training_args,
                      train_dataset=train_dataset,
                      eval_dataset=eval_dataset,
                      compute_metrics=build_compute_metrics_fn(task_name))

    tune_config = {
        "per_device_eval_batch_size":
        32,
        "eval_steps":
        tune.sample_from(lambda spec: len(train_dataset) // spec.config[
            "per_device_train_batch_size"] + 1  # noqa: E501
                         ) if not smoke_test else 1,
        "save_steps":
        tune.sample_from(lambda spec: spec.config["eval_steps"]),
        "num_train_epochs":
        tune.choice([2, 3, 4, 5]),
        "max_steps":
        1 if smoke_test else -1,  # Used for smoke test.
    }

    scheduler = PopulationBasedTraining(time_attr="training_iteration",
                                        metric="eval_acc",
                                        mode="max",
                                        perturbation_interval=1,
                                        hyperparam_mutations={
                                            "weight_decay":
                                            tune.uniform(0.0, 0.3),
                                            "learning_rate":
                                            tune.uniform(1e-5, 5e-5),
                                            "per_device_train_batch_size":
                                            [16, 32, 64],
                                        })

    reporter = CLIReporter(parameter_columns={
        "weight_decay": "w_decay",
        "learning_rate": "lr",
        "per_device_train_batch_size": "train_bs/gpu",
        "num_epochs": "num_epochs"
    },
                           metric_columns=[
                               "eval_acc", "eval_loss", "epoch",
                               "training_iteration"
                           ])

    trainer.hyperparameter_search(
        hp_space=lambda _: tune_config,
        backend="ray",
        n_trials=num_samples,
        resources_per_trial={
            "cpu": 1,
            "gpu": gpus_per_trial
        },
        scheduler=scheduler,
        keep_checkpoints_num=3,
        checkpoint_score_attr="training_iteration",
        stop={"training_iteration": 1} if smoke_test else None,
        progress_reporter=reporter,
        local_dir="~/ray_results/",
        name="tune_transformer_pbt",
        log_to_file=True)
Exemple #27
0
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)


# Les modèles sont assez lourds (environ 500Mo)
#
# Après avoir été téléchargé, il est important de réutiliser les documents sur disque

# In[162]:

task = 'sentiment'
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

model = AutoModelForSequenceClassification.from_pretrained(
    '/mnt/pretrained_models/' + MODEL)
tokenizer = AutoTokenizer.from_pretrained('/mnt/pretrained_models/' + MODEL)
config = AutoConfig.from_pretrained('/mnt/pretrained_models/' + MODEL)

# In[163]:

# download label mapping
labels = []
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode('utf-8').split("\n")
    csvreader = csv.reader(html, delimiter='\t')
labels = [row[1] for row in csvreader if len(row) > 1]

# In[164]:
 def get_model():
     return AutoModelForSequenceClassification.from_pretrained(
         model_name,
         config=config,
     )
Exemple #29
0
    def __init__(self, device, args):
        """
        Initializes a MAML few shot learning system
        :param device: The device to use to use the model on.
        :param args: A namedtuple of arguments specifying various hyperparameters.
        """
        super(MAMLFewShotClassifier, self).__init__(device, args)

        config = AutoConfig.from_pretrained(args.pretrained_weights)
        config.num_labels = args.num_classes_per_set
        model_initialization = AutoModelForSequenceClassification.from_pretrained(
            args.pretrained_weights, config=config
        )

        slow_model = MetaBERT

        # Init fast model
        state_dict = model_initialization.state_dict()
        config = model_initialization.config

        del model_initialization

        # Slow model
        self.classifier = slow_model.init_from_pretrained(
            state_dict,
            config,
            num_labels=args.num_classes_per_set,
            is_distil=self.is_distil,
            is_xlm=self.is_xlm,
            per_step_layer_norm_weights=args.per_step_layer_norm_weights,
            num_inner_loop_steps=args.number_of_training_steps_per_iter,
            device=device,
        )
        self.classifier.to("cpu")
        self.classifier.train()

        self.inner_loop_optimizer = LSLRGradientDescentLearningRule(
            device=torch.device("cpu"),
            init_learning_rate=self.task_learning_rate,
            total_num_inner_loop_steps=self.args.number_of_training_steps_per_iter,
            use_learnable_learning_rates=self.args.learnable_per_layer_per_step_inner_loop_learning_rate,
            init_class_head_lr_multiplier=self.args.init_class_head_lr_multiplier,
        )

        self.inner_loop_optimizer.initialise(
            names_weights_dict=self.get_inner_loop_parameter_dict(
                params=self.classifier.named_parameters()
            )
        )

        print("Inner Loop parameters")
        for key, value in self.inner_loop_optimizer.named_parameters():
            print(key, value.shape)

        print("Outer Loop parameters")
        for name, param in self.named_parameters():
            if param.requires_grad:
                print(name, param.shape, param.device, param.requires_grad)

        self.optimizer = Ranger(
            [
                {"params": self.classifier.parameters(), "lr": args.meta_learning_rate},
                {
                    "params": self.inner_loop_optimizer.parameters(),
                    "lr": args.meta_inner_optimizer_learning_rate,
                },
            ],
            lr=args.meta_learning_rate,
        )
        self.scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer=self.optimizer,
            T_max=self.args.total_epochs * self.args.total_iter_per_epoch,
            eta_min=self.args.min_learning_rate,
        )

        self.inner_loop_optimizer.to(self.device)

        self.clip_value = 1.0
        # gradient clipping
        for p in self.classifier.parameters():
            if p.requires_grad:
                p.register_hook(
                    lambda grad: torch.clamp(grad, -self.clip_value, self.clip_value)
                )

        self.num_freeze_epochs = args.num_freeze_epochs
        if self.num_freeze_epochs > 0:
            self.classifier.freeze()
Exemple #30
0
def main(args):
    set_seed(args.seed)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    config = AutoConfig.from_pretrained(args.model_name, num_labels=args.num_labels)
    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
    model = AutoModelForSequenceClassification.from_pretrained(args.model_name, config=config)
    model.to(device)

    collator = utils.Collator(pad_token_id=tokenizer.pad_token_id)
    train_dataset, label_map = utils.load_classification_dataset(
        args.train,
        tokenizer,
        args.field_a,
        args.field_b,
        args.label_field,
        limit=args.limit
    )
    train_loader = DataLoader(train_dataset, batch_size=args.bsz, shuffle=True, collate_fn=collator)
    dev_dataset, _ = utils.load_classification_dataset(
        args.dev,
        tokenizer,
        args.field_a,
        args.field_b,
        args.label_field,
        label_map
    )
    dev_loader = DataLoader(dev_dataset, batch_size=args.bsz, shuffle=False, collate_fn=collator)
    test_dataset, _ = utils.load_classification_dataset(
        args.test,
        tokenizer,
        args.field_a,
        args.field_b,
        args.label_field,
        label_map
    )
    test_loader = DataLoader(test_dataset, batch_size=args.bsz, shuffle=False, collate_fn=collator)

    if args.bias_correction:
        betas = (0.9, 0.999)
    else:
        betas = (0.0, 0.000)

    optimizer = AdamW(
        model.parameters(),
        lr=args.lr,
        weight_decay=1e-2,
        betas=betas
    )

    # Use suggested learning rate scheduler
    num_training_steps = len(train_dataset) * args.epochs // args.bsz
    num_warmup_steps = num_training_steps // 10
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps,
                                                num_training_steps)

    if not args.ckpt_dir.exists():
        logger.info(f'Making checkpoint directory: {args.ckpt_dir}')
        args.ckpt_dir.mkdir(parents=True)
    elif not args.force_overwrite:
        raise RuntimeError('Checkpoint directory already exists.')

    try:
        best_accuracy = 0
        for epoch in range(args.epochs):
            logger.info('Training...')
            model.train()
            avg_loss = utils.ExponentialMovingAverage()
            pbar = tqdm(train_loader)
            for model_inputs, labels in pbar:
                model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
                labels = labels.to(device)
                optimizer.zero_grad()
                logits, *_ = model(**model_inputs)
                loss = F.cross_entropy(logits, labels.squeeze(-1))
                loss.backward()
                optimizer.step()
                scheduler.step()
                avg_loss.update(loss.item())
                pbar.set_description(f'loss: {avg_loss.get_metric(): 0.4f}, '
                                     f'lr: {optimizer.param_groups[0]["lr"]: .3e}')

            logger.info('Evaluating...')
            model.eval()
            correct = 0
            total = 0
            with torch.no_grad():
                for model_inputs, labels in dev_loader:
                    model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
                    labels = labels.to(device)
                    logits, *_ = model(**model_inputs)
                    _, preds = logits.max(dim=-1)
                    correct += (preds == labels.squeeze(-1)).sum().item()
                    total += labels.size(0)
                accuracy = correct / (total + 1e-13)
            logger.info(f'Accuracy: {accuracy : 0.4f}')

            if accuracy > best_accuracy:
                logger.info('Best performance so far.')
                model.save_pretrained(args.ckpt_dir)
                tokenizer.save_pretrained(args.ckpt_dir)
                best_accuracy = accuracy
    except KeyboardInterrupt:
        logger.info('Interrupted...')

    logger.info('Testing...')
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for model_inputs, labels in test_loader:
            model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
            labels = labels.to(device)
            logits, *_ = model(**model_inputs)
            _, preds = logits.max(dim=-1)
            correct += (preds == labels.squeeze(-1)).sum().item()
            total += labels.size(0)
        accuracy = correct / (total + 1e-13)
    logger.info(f'Accuracy: {accuracy : 0.4f}')