Python SquadProcessor.SquadProcessor Exemples, farm.data_handler.processor.SquadProcessor.SquadProcessor Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : import_huggingface_downstream_models.py Projet : antocapp/FARM

def import_downstream_models():
    ####################### loads a SQUAD finetuned model
    # saves it as a FARM adaptive model
    device, n_gpu = initialize_device_settings(use_cuda=True)
    model = "bert-large-uncased-whole-word-masking-finetuned-squad"
    save_dir = "saved_models/FARM-bert-large-uncased-whole-word-masking-finetuned-squad"
    lm = Bert.load(model)
    ph = QuestionAnsweringHead.load(model)
    am = AdaptiveModel(language_model=lm,
                       prediction_heads=[ph],
                       embeds_dropout_prob=0.1,
                       lm_output_types="per_token",
                       device=device)
    am.save(save_dir)
    # saves the processor associated with it, so you can use it in inference mode
    # TODO load HF's tokenizer_config.json and adjust settings
    tokenizer = BertTokenizer.from_pretrained(
        pretrained_model_name_or_path=model)
    label_list = ["start_token", "end_token"]
    metric = "squad"
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=256,
        label_list=label_list,
        metric=metric,
        data_dir="../data/squad20",
    )
    processor.save(save_dir)

Exemple #2

0

Afficher le fichier

Fichier : evaluation.py Projet : yon606/FARM

def evaluate_question_answering():
    ##########################
    ########## Settings
    ##########################
    device, n_gpu = initialize_device_settings(use_cuda=True)
    lang_model = "deepset/roberta-base-squad2"
    do_lower_case = True

    data_dir = Path("../data/squad20")
    evaluation_filename = "dev-v2.0.json"

    batch_size = 50
    no_ans_boost = 0
    accuracy_at = 3  # accuracy at n is useful for answers inside long documents

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)

    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=256,
        label_list=["start_token", "end_token"],
        metric="squad",
        train_filename=None,
        dev_filename=None,
        dev_split=0,
        test_filename=evaluation_filename,
        data_dir=data_dir,
        doc_stride=128,
    )

    # 3. Create a DataSilo that loads dataset, provides DataLoaders for them and calculates a few descriptive statistics of our datasets
    data_silo = DataSilo(processor=processor, batch_size=batch_size)

    # 4. Create an Evaluator
    evaluator = Evaluator(data_loader=data_silo.get_data_loader("test"),
                          tasks=data_silo.processor.tasks,
                          device=device)

    # 5. Load model
    model = AdaptiveModel.convert_from_transformers(
        lang_model, device=device, task_type="question_answering")
    # use "load" if you want to use a local model that was trained with FARM
    #model = AdaptiveModel.load(lang_model, device=device)
    model.prediction_heads[0].no_ans_boost = no_ans_boost
    model.prediction_heads[0].n_best = accuracy_at
    model.connect_heads_with_processor(data_silo.processor.tasks,
                                       require_labels=True)

    # 6. Run the Evaluator
    results = evaluator.eval(model)
    f1_score = results[0]["f1"]
    em_score = results[0]["EM"]
    tnacc = results[0]["top_n_accuracy"]
    print("F1-Score:", f1_score)
    print("Exact Match Score:", em_score)
    print(f"top_{accuracy_at}_accuracy:", tnacc)

Exemple #3

0

Afficher le fichier

Fichier : test_question_answering.py Projet : svmihar/FARM

def test_qa(caplog):
    caplog.set_level(logging.CRITICAL)

    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=True)
    batch_size = 2
    n_epochs = 1
    evaluate_every = 4
    base_LM_model = "bert-base-cased"

    tokenizer = Tokenizer.load(
        pretrained_model_name_or_path=base_LM_model, do_lower_case=False
    )
    label_list = ["start_token", "end_token"]
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=20,
        doc_stride=10,
        max_query_length=6,
        train_filename="train-sample.json",
        dev_filename="dev-sample.json",
        test_filename=None,
        data_dir="samples/qa",
        label_list=label_list,
        metric="squad"
    )

    data_silo = DataSilo(processor=processor, batch_size=batch_size)
    language_model = LanguageModel.load(base_LM_model)
    prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)])
    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_token"],
        device=device,
    )

    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=2e-5,
        #optimizer_opts={'name': 'AdamW', 'lr': 2E-05},
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        device=device
    )
    trainer = Trainer(
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=n_epochs,
        n_gpu=n_gpu,
        lr_schedule=lr_schedule,
        evaluate_every=evaluate_every,
        device=device
    )
    model = trainer.train(model)
    save_dir = "testsave/qa"
    model.save(save_dir)
    processor.save(save_dir)

Exemple #4

0

Afficher le fichier

Fichier : conftest.py Projet : vivek22122014/FARM

def distilbert_squad(request):
    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=False)
    batch_size = 2
    n_epochs = 1
    evaluate_every = 4
    base_LM_model = "distilbert-base-uncased"

    tokenizer = Tokenizer.load(
        pretrained_model_name_or_path=base_LM_model,
        do_lower_case=True,
        use_fast=True  # TODO parametrize this to test slow as well
    )
    label_list = ["start_token", "end_token"]
    processor = SquadProcessor(tokenizer=tokenizer,
                               max_seq_len=20,
                               doc_stride=10,
                               max_query_length=6,
                               train_filename="train-sample.json",
                               dev_filename="dev-sample.json",
                               test_filename=None,
                               data_dir=Path("samples/qa"),
                               label_list=label_list,
                               metric="squad")

    data_silo = DataSilo(processor=processor,
                         batch_size=batch_size,
                         max_processes=1)
    language_model = LanguageModel.load(base_LM_model)
    prediction_head = QuestionAnsweringHead()
    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_token"],
        device=device,
    )

    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=2e-5,
        #optimizer_opts={'name': 'AdamW', 'lr': 2E-05},
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        device=device)
    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      data_silo=data_silo,
                      epochs=n_epochs,
                      n_gpu=n_gpu,
                      lr_schedule=lr_schedule,
                      evaluate_every=evaluate_every,
                      device=device)
    trainer.train()

    return model, processor

Exemple #5

0

Afficher le fichier

    def eval_on_file(self,
                     data_dir: str,
                     test_filename: str,
                     device: Optional[str] = None):
        """
        Performs evaluation on a SQuAD-formatted file.
        Returns a dict containing the following metrics:
            - "EM": exact match score
            - "f1": F1-Score
            - "top_n_accuracy": Proportion of predicted answers that overlap with correct answer

        :param data_dir: The directory in which the test set can be found
        :type data_dir: Path or str
        :param test_filename: The name of the file containing the test data in SQuAD format.
        :type test_filename: str
        :param device: The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default.
        :type device: str
        """
        if device is None:
            device = self.device
        eval_processor = SquadProcessor(
            tokenizer=self.inferencer.processor.tokenizer,
            max_seq_len=self.inferencer.processor.max_seq_len,
            label_list=self.inferencer.processor.tasks["question_answering"]
            ["label_list"],
            metric=self.inferencer.processor.tasks["question_answering"]
            ["metric"],
            train_filename=None,
            dev_filename=None,
            dev_split=0,
            test_filename=test_filename,
            data_dir=Path(data_dir),
        )

        data_silo = DataSilo(processor=eval_processor,
                             batch_size=self.inferencer.batch_size,
                             distributed=False)
        data_loader = data_silo.get_data_loader("test")

        evaluator = Evaluator(data_loader=data_loader,
                              tasks=eval_processor.tasks,
                              device=device)

        eval_results = evaluator.eval(self.inferencer.model)
        results = {
            "EM": eval_results[0]["EM"],
            "f1": eval_results[0]["f1"],
            "top_n_accuracy": eval_results[0]["top_n_accuracy"]
        }
        return results

Exemple #6

0

Afficher le fichier

Fichier : infer.py Projet : wwmmqq/FARM

    def load(cls,
             model_name_or_path,
             batch_size=4,
             gpu=False,
             task_type=None,
             return_class_probs=False,
             strict=True,
             max_seq_len=256):
        """
        Load an Inferencer incl. all relevant components (model, tokenizer, processor ...) either by

        1. specifying a public name from transformers' model hub (https://huggingface.co/models)
        2. or pointing to a local directory it is saved in.

        :param model_name_or_path: Local directory or public name of the model to load.
        :type model_name_or_path: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param task_type: Type of task the model should be used for. Currently supporting:
                          "embeddings", "question_answering", "text_classification". More coming soon...
        :param task_type: str
        :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in
                       the PredictionHead (see torch.nn.module.load_state_dict()).
                       Set to `False` for backwards compatibility with PHs saved with older version of FARM.
        :type strict: bool
        :return: An instance of the Inferencer.

        """

        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   use_amp=None)
        name = os.path.basename(model_name_or_path)

        # a) either from local dir
        if os.path.exists(model_name_or_path):
            model = AdaptiveModel.load(model_name_or_path,
                                       device,
                                       strict=strict)
            if task_type == "embeddings":
                processor = InferenceProcessor.load_from_dir(
                    model_name_or_path)
            else:
                processor = Processor.load_from_dir(model_name_or_path)

        # b) or from remote transformers model hub
        else:
            logger.info(
                f"Could not find `{model_name_or_path}` locally. Try to download from model hub ..."
            )
            if not task_type:
                raise ValueError(
                    "Please specify the 'task_type' of the model you want to load from transformers. "
                    "Valid options for arg `task_type`:"
                    "'question_answering', 'embeddings', 'text_classification'"
                )

            model = AdaptiveModel.convert_from_transformers(
                model_name_or_path, device, task_type)
            config = AutoConfig.from_pretrained(model_name_or_path)
            tokenizer = Tokenizer.load(model_name_or_path)

            # TODO infer task_type automatically from config (if possible)
            if task_type == "question_answering":
                processor = SquadProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    label_list=["start_token", "end_token"],
                    metric="squad",
                    data_dir=None,
                )
            elif task_type == "embeddings":
                processor = InferenceProcessor(tokenizer=tokenizer,
                                               max_seq_len=max_seq_len)

            elif task_type == "text_classification":
                label_list = list(config.id2label[id]
                                  for id in range(len(config.id2label)))
                processor = TextClassificationProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    data_dir=None,
                    label_list=label_list,
                    label_column_name="label",
                    metric="acc",
                    quote_char='"',
                )

            # elif task_type == "multilabel-classification":
            #     # label_list = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
            #     label_list = list(config.label2id.keys())
            #
            #     processor = TextClassificationProcessor(tokenizer=tokenizer,
            #                                             max_seq_len=max_seq_len,
            #                                             data_dir=None,
            #                                             label_list=label_list,
            #                                             label_column_name="label",
            #                                             metric="acc",
            #                                             quote_char='"',
            #                                             multilabel=True,
            #                                             )

            elif task_type == "ner":
                label_list = list(config.label2id.keys())
                processor = NERProcessor(tokenizer=tokenizer,
                                         max_seq_len=max_seq_len,
                                         data_dir=None,
                                         metric="seq_f1",
                                         label_list=label_list)
            else:
                raise ValueError(
                    f"`task_type` {task_type} is not supported yet. "
                    f"Valid options for arg `task_type`: 'question_answering', 'embeddings', 'text_classification'"
                )

        return cls(
            model,
            processor,
            batch_size=batch_size,
            gpu=gpu,
            name=name,
            return_class_probs=return_class_probs,
        )

Exemple #7

0

Afficher le fichier

Fichier : adaptive_model.py Projet : vumichien/FARM

    def convert_to_onnx(self,
                        output_path,
                        opset_version=11,
                        optimize_for=None):
        """
        Convert a PyTorch AdaptiveModel to ONNX.

        The conversion is trace-based by performing a forward pass on the model with a input batch.

        :param output_path: model dir to write the model and config files
        :type output_path: Path
        :param opset_version: ONNX opset version
        :type opset_version: int
        :param optimize_for: optimize the exported model for a target device. Available options
                             are "gpu_tensor_core" (GPUs with tensor core like V100 or T4),
                             "gpu_without_tensor_core" (most other GPUs), and "cpu".
        :type optimize_for: str
        :return:
        """
        if type(self.prediction_heads[0]) is not QuestionAnsweringHead:
            raise NotImplementedError

        tokenizer = Tokenizer.load(
            pretrained_model_name_or_path="deepset/bert-base-cased-squad2")

        label_list = ["start_token", "end_token"]
        metric = "squad"
        max_seq_len = 384
        batch_size = 1
        processor = SquadProcessor(
            tokenizer=tokenizer,
            max_seq_len=max_seq_len,
            label_list=label_list,
            metric=metric,
            train_filename=
            "stub-file",  # the data is loaded from dicts instead of file.
            dev_filename=None,
            test_filename=None,
            data_dir="stub-dir",
        )

        data_silo = DataSilo(processor=processor,
                             batch_size=1,
                             distributed=False,
                             automatic_loading=False)
        sample_dict = [{
            "context":
            'The Normans were the people who in the 10th and 11th centuries gave their name to Normandy, '
            'a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders '
            'and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear '
            'fealty to King Charles III of West Francia.',
            "qas": [{
                "question": "In what country is Normandy located?",
                "id": "56ddde6b9a695914005b9628",
                "answers": [{
                    "text": "France",
                    "answer_start": 159
                }],
                "is_impossible": False,
            }],
        }]

        data_silo._load_data(train_dicts=sample_dict)
        data_loader = data_silo.get_data_loader("train")
        data = next(iter(data_loader))
        data = list(data.values())

        inputs = {
            'input_ids':
            data[0].to(self.device).reshape(batch_size, max_seq_len),
            'padding_mask':
            data[1].to(self.device).reshape(batch_size, max_seq_len),
            'segment_ids':
            data[2].to(self.device).reshape(batch_size, max_seq_len)
        }

        # The method argument passing in torch.onnx.export is different to AdaptiveModel's forward().
        # To resolve that, an ONNXWrapper instance is used.
        model = ONNXWrapper.load_from_adaptive_model(self)

        if not os.path.exists(output_path):
            os.makedirs(output_path)

        with torch.no_grad():
            symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
            torch.onnx.export(
                model,
                args=tuple(inputs.values()),
                f=output_path / 'model.onnx'.format(opset_version),
                opset_version=opset_version,
                do_constant_folding=True,
                input_names=['input_ids', 'padding_mask', 'segment_ids'],
                output_names=['logits'],
                dynamic_axes={
                    'input_ids': symbolic_names,
                    'padding_mask': symbolic_names,
                    'segment_ids': symbolic_names,
                    'logits': symbolic_names,
                })

        if optimize_for:
            optimize_args = Namespace(disable_attention=False,
                                      disable_bias_gelu=False,
                                      disable_embed_layer_norm=False,
                                      opt_level=99,
                                      disable_skip_layer_norm=False,
                                      disable_bias_skip_layer_norm=False,
                                      hidden_size=768,
                                      verbose=False,
                                      input='onnx-export/model.onnx',
                                      model_type='bert',
                                      num_heads=12,
                                      output='onnx-export/model.onnx')

            if optimize_for == "gpu_tensor_core":
                optimize_args.float16 = True
                optimize_args.input_int32 = True
            elif optimize_for == "gpu_without_tensor_core":
                optimize_args.float16 = False
                optimize_args.input_int32 = True
            elif optimize_for == "cpu":
                logger.info("")
                optimize_args.float16 = False
                optimize_args.input_int32 = False
            else:
                raise NotImplementedError(
                    f"ONNXRuntime model optimization is not available for {optimize_for}. Choose "
                    f"one of 'gpu_tensor_core'(V100 or T4), 'gpu_without_tensor_core' or 'cpu'."
                )

            optimize_onnx_model(optimize_args)
        else:
            logger.info(
                "Exporting unoptimized ONNX model. To enable optimization, supply "
                "'optimize_for' parameter with the target device.'")

        # PredictionHead contains functionalities like logits_to_preds() that would still be needed
        # for Inference with ONNX models. Only the config of the PredictionHead is stored.
        for i, ph in enumerate(self.prediction_heads):
            ph.save_config(output_path, i)

        processor.save(output_path)

        onnx_model_config = {
            "onnx_opset_version": opset_version,
            "language": self.get_language(),
        }
        with open(output_path / "model_config.json", "w") as f:
            json.dump(onnx_model_config, f)

        logger.info(f"Model exported at path {output_path}")

Exemple #8

0

Afficher le fichier

Fichier : xlmr_qa_demo.py Projet : antocapp/FARM

def xlmr_qa_demo():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

    ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
    ml_logger.init_experiment(experiment_name="Public_FARM", run_name="run_xmlr_qa")

    #########################
    ######## Settings
    ########################
    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=True)
    batch_size = 3
    grad_acc_steps = 8
    n_epochs = 2
    evaluate_every = 200
    base_LM_model = "xlm-roberta-large"

    data_dir = Path("../data/squad20")
    train_filename = Path("train-v2.0.json")
    dev_filename = Path("dev-v2.0.json")

    save_dir = Path("../saved_models/xlmr-large-qa")

    inference_file = Path("../data/MLQA_V1/dev/dev-context-de-question-de.json")
    predictions_file = save_dir / "predictions.json"
    full_predictions_file = save_dir / "full_predictions.json"
    max_processes_for_inference = 8
    train = True
    inference = False

    if train:
        # 1.Create a tokenizer
        tokenizer = Tokenizer.load(pretrained_model_name_or_path=base_LM_model)
        # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
        label_list = ["start_token", "end_token"]
        metric = "squad"
        processor = SquadProcessor(
            tokenizer=tokenizer,
            max_seq_len=384,
            label_list=label_list,
            metric=metric,
            train_filename=train_filename,
            dev_filename=dev_filename,
            test_filename=None,
            data_dir=data_dir,
            dev_split=0.0
        )

        # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
        data_silo = DataSilo(processor=processor, batch_size=batch_size, distributed=False, max_processes=1)

        # 4. Create an AdaptiveModel
        # a) which consists of a pretrained language model as a basis
        language_model = LanguageModel.load(base_LM_model, n_added_tokens=3)
        # b) and a prediction head on top that is suited for our task => Question Answering
        prediction_head = QuestionAnsweringHead()

        model = AdaptiveModel(
            language_model=language_model,
            prediction_heads=[prediction_head],
            embeds_dropout_prob=0.1,
            lm_output_types=["per_token"],
            device=device,
        )

        # 5. Create an optimizer
        model, optimizer, lr_schedule = initialize_optimizer(
            model=model,
            learning_rate=3e-5,
            schedule_opts={"name": "LinearWarmup", "warmup_proportion": 0.2},
            n_batches=len(data_silo.loaders["train"]),
            n_epochs=n_epochs,
            grad_acc_steps=grad_acc_steps,
            device=device
        )

        # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time
        trainer = Trainer(
            optimizer=optimizer,
            data_silo=data_silo,
            epochs=n_epochs,
            n_gpu=n_gpu,
            lr_schedule=lr_schedule,
            evaluate_every=evaluate_every,
            device=device,
        )
        # 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai
        model = trainer.train(model)

        # 8. Hooray! You have a model. Store it:
        model.save(save_dir)
        processor.save(save_dir)


    if inference:
        model = Inferencer.load(save_dir, batch_size=32, gpu=True)
        full_result = model.inference_from_file(
            file=inference_file,
            max_processes=max_processes_for_inference,
        )

        for x in full_result:
            print(x)
            print()

        result = {r["id"]: r["preds"][0][0] for r in full_result}
        full_result = {r["id"]: r["preds"] for r in full_result}

        json.dump(result,
                  open(predictions_file, "w"),
                  indent=4,
                  ensure_ascii=False)
        json.dump(full_result,
                  open(full_predictions_file, "w"),
                  indent=4,
                  ensure_ascii=False)

Exemple #9

0

Afficher le fichier

Fichier : question_answering_accuracy.py Projet : ftesser/FARM

def train_evaluation_single(seed=42):
    ##########################
    ########## Settings
    ##########################
    set_all_seeds(seed=seed)
    device, n_gpu = initialize_device_settings(use_cuda=True)
    # GPU utilization on 4x V100
    # 40*4, 14.3/16GB on master, 12.6/16 on others
    batch_size = 40 * n_gpu_factor
    n_epochs = 2
    evaluate_every = 2000000  # disabling dev eval
    lang_model = "roberta-base"
    do_lower_case = False  # roberta is a cased model
    test_assertions = False
    train_filename = "train-v2.0.json"
    dev_filename = "dev-v2.0.json"

    # Load model and train
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=256,
        label_list=["start_token", "end_token"],
        metric="squad",
        train_filename=train_filename,
        dev_filename=dev_filename,
        test_filename=None,
        data_dir=Path("testsave/data/squad20"),
    )
    data_silo = DataSilo(processor=processor, batch_size=batch_size)
    language_model = LanguageModel.load(lang_model)
    prediction_head = QuestionAnsweringHead(n_best=5, n_best_per_sample=1)
    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_token"],
        device=device,
    )
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=3e-5,
        schedule_opts={
            "name": "LinearWarmup",
            "warmup_proportion": 0.2
        },
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        device=device)
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=n_epochs,
        n_gpu=n_gpu,
        lr_schedule=lr_schedule,
        evaluate_every=evaluate_every,
        device=device,
    )
    starttime = time()
    trainer.train()
    elapsed = time() - starttime

    save_dir = Path("testsave/roberta-qa-dev")
    model.save(save_dir)
    processor.save(save_dir)

    # Create Evaluator
    evaluator = Evaluator(data_loader=data_silo.get_data_loader("dev"),
                          tasks=data_silo.processor.tasks,
                          device=device)

    results = evaluator.eval(model)
    f1_score = results[0]["f1"] * 100
    em_score = results[0]["EM"] * 100
    tnacc = results[0]["top_n_accuracy"] * 100

    print(results)
    print(elapsed)

    gold_f1 = 82.155
    gold_EM = 78.6575  #77.714
    gold_tnrecall = 97.3721
    gold_elapsed = 1135
    if test_assertions:
        np.testing.assert_allclose(
            f1_score,
            gold_f1,
            rtol=0.01,
            err_msg=
            f"FARM Training changed for f1 score by: {f1_score - gold_f1}")
        np.testing.assert_allclose(
            em_score,
            gold_EM,
            rtol=0.01,
            err_msg=f"FARM Training changed for EM by: {em_score - gold_EM}")
        np.testing.assert_allclose(
            tnacc,
            gold_tnrecall,
            rtol=0.01,
            err_msg=
            f"FARM Training changed for top 5 accuracy by: {tnacc - gold_tnrecall}"
        )
        np.testing.assert_allclose(
            elapsed,
            gold_elapsed,
            rtol=0.1,
            err_msg=
            f"FARM Training speed changed significantly by: {elapsed - gold_elapsed} seconds"
        )
    if not np.allclose(f1_score, gold_f1, rtol=0.01):
        error_messages.append(
            f"FARM Training changed for f1 score by: {round(f1_score - gold_f1, 4)}"
        )
    if not np.allclose(em_score, gold_EM, rtol=0.01):
        error_messages.append(
            f"FARM Training changed for EM by: {round(em_score - gold_EM, 4)}")
    if not np.allclose(tnacc, gold_tnrecall, rtol=0.01):
        error_messages.append(
            f"FARM Training changed for top 5 accuracy by: {round(tnacc - gold_tnrecall, 4)}"
        )
    if not np.allclose(elapsed, gold_elapsed, rtol=0.1):
        error_messages.append(
            f"FARM Training speed changed significantly by: {round(elapsed - gold_elapsed, 4)} seconds"
        )

    benchmark_result = [{
        "run": "train evaluation",
        "f1_change": round(f1_score - gold_f1, 4),
        "em_change": round(em_score - gold_EM, 4),
        "tnacc_change": round(tnacc - gold_tnrecall, 4),
        "elapsed_change": round(elapsed - gold_elapsed, 4),
        "f1": f1_score,
        "em": em_score,
        "tnacc": round(tnacc, 4),
        "elapsed": elapsed,
        "f1_gold": gold_f1,
        "em_gold": gold_EM,
        "tnacc_gold": gold_tnrecall,
        "elapsed_gold": gold_elapsed
    }]
    logger.info("\n\n" + pformat(benchmark_result) + "\n")
    return benchmark_result

Exemple #10

0

Afficher le fichier

Fichier : question_answering_accuracy.py Projet : vumichien/FARM

def train_evaluation_single(seed=42):
    ##########################
    ########## Settings
    ##########################
    set_all_seeds(seed=seed)
    device, n_gpu = initialize_device_settings(use_cuda=True)
    batch_size = 32 * 4  # 4x V100
    n_epochs = 2
    evaluate_every = 2000000  # disabling dev eval
    lang_model = "roberta-base"
    do_lower_case = False  # roberta is a cased model
    train_filename = "train-v2.0.json"
    dev_filename = "dev-v2.0.json"

    # Load model and train
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=256,
        label_list=["start_token", "end_token"],
        metric="squad",
        train_filename=train_filename,
        dev_filename=dev_filename,
        test_filename=None,
        data_dir=Path("testsave/data/squad20"),
    )
    data_silo = DataSilo(processor=processor,
                         batch_size=batch_size,
                         distributed=False)
    language_model = LanguageModel.load(lang_model)
    prediction_head = QuestionAnsweringHead()
    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_token"],
        device=device,
    )
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=3e-5,
        schedule_opts={
            "name": "LinearWarmup",
            "warmup_proportion": 0.2
        },
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        device=device)
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=n_epochs,
        n_gpu=n_gpu,
        lr_schedule=lr_schedule,
        evaluate_every=evaluate_every,
        device=device,
    )
    starttime = time()
    trainer.train()
    elapsed = time() - starttime

    save_dir = Path("testsave/roberta-qa-dev")
    model.save(save_dir)
    processor.save(save_dir)

    # Create Evaluator
    evaluator = Evaluator(data_loader=data_silo.get_data_loader("dev"),
                          tasks=data_silo.processor.tasks,
                          device=device)

    results = evaluator.eval(model)
    f1_score = results[0]["f1"] * 100
    em_score = results[0]["EM"] * 100
    tnrecall = results[0]["top_n_recall"] * 100

    print(results)
    print(elapsed)

    gold_f1 = 82.155
    gold_EM = 77.714
    gold_tnrecall = 97.3721  #
    gold_elapsed = 1286.30
    np.testing.assert_allclose(
        f1_score,
        gold_f1,
        rtol=0.01,
        err_msg=f"FARM Training changed for f1 score by: {f1_score - gold_f1}")
    np.testing.assert_allclose(
        em_score,
        gold_EM,
        rtol=0.01,
        err_msg=f"FARM Training changed for EM by: {em_score - gold_EM}")
    np.testing.assert_allclose(
        tnrecall,
        gold_tnrecall,
        rtol=0.01,
        err_msg=
        f"FARM Training changed for top 1 recall by: {em_score - gold_EM}")
    np.testing.assert_allclose(
        elapsed,
        gold_elapsed,
        rtol=0.1,
        err_msg=
        f"FARM Eval speed changed significantly by: {elapsed - gold_elapsed} seconds"
    )

Exemple #11

0

Afficher le fichier

Fichier : test_processor_qa.py Projet : voidful/FARM

def test_dataset_from_dicts_qa_labelconversion(caplog=None):
    if caplog:
        caplog.set_level(logging.CRITICAL)

    models = [
        "deepset/roberta-base-squad2",
        "deepset/bert-base-cased-squad2",
        "deepset/xlm-roberta-large-squad2",
        "deepset/minilm-uncased-squad2",
        "deepset/electra-base-squad2",
    ]
    sample_types = [
        "answer-wrong", "answer-offset-wrong", "noanswer", "vanilla"
    ]

    for model in models:
        tokenizer = Tokenizer.load(pretrained_model_name_or_path=model,
                                   use_fast=True)
        processor = SquadProcessor(tokenizer, max_seq_len=256, data_dir=None)

        for sample_type in sample_types:
            dicts = processor.file_to_dicts(f"samples/qa/{sample_type}.json")
            dataset, tensor_names, problematic_sample_ids = processor.dataset_from_dicts(
                dicts, indices=[1], return_baskets=False)

            if sample_type == "answer-wrong" or sample_type == "answer-offset-wrong":
                assert len(
                    problematic_sample_ids
                ) == 1, f"Processing labels for {model} has changed."

            if sample_type == "noanswer":
                assert list(dataset.tensors[tensor_names.index(
                    "labels")].numpy()[0, 0, :]) == [
                        0, 0
                    ], f"Processing labels for {model} has changed."
                assert list(dataset.tensors[
                    tensor_names.index("labels")].numpy()[0, 1, :]) == [
                        -1, -1
                    ], f"Processing labels for {model} has changed."

            if sample_type == "vanilla":
                # roberta
                if model == "deepset/roberta-base-squad2":
                    assert list(dataset.tensors[
                        tensor_names.index("labels")].numpy()[0, 0, :]) == [
                            13, 13
                        ], f"Processing labels for {model} has changed."
                    assert list(dataset.tensors[
                        tensor_names.index("labels")].numpy()[0, 1, :]) == [
                            13, 14
                        ], f"Processing labels for {model} has changed."
                # bert, minilm, electra
                if model == "deepset/bert-base-cased-squad2" or model == "deepset/minilm-uncased-squad2" or model == "deepset/electra-base-squad2":
                    assert list(dataset.tensors[
                        tensor_names.index("labels")].numpy()[0, 0, :]) == [
                            11, 11
                        ], f"Processing labels for {model} has changed."
                # xlm-roberta
                if model == "deepset/xlm-roberta-large-squad2":
                    assert list(dataset.tensors[
                        tensor_names.index("labels")].numpy()[0, 0, :]) == [
                            12, 12
                        ], f"Processing labels for {model} has changed."

Exemple #12

0

Afficher le fichier

Fichier : question_answering.py Projet : Wkryst/FARM

device, n_gpu = initialize_device_settings(use_cuda=True)
batch_size = 24
n_epochs = 2
evaluate_every = 500
base_LM_model = "bert-base-cased"
train_filename = "train-v2.0.json"
dev_filename = "dev-v2.0.json"

# 1.Create a tokenizer
tokenizer = BertTokenizer.from_pretrained(
    pretrained_model_name_or_path=base_LM_model, do_lower_case=False)
# 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
processor = SquadProcessor(
    tokenizer=tokenizer,
    max_seq_len=256,
    train_filename=train_filename,
    dev_filename=dev_filename,
    test_filename=None,
    data_dir="../data/squad20",
)

# 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
data_silo = DataSilo(processor=processor,
                     batch_size=batch_size,
                     distributed=False)

# 4. Create an AdaptiveModel
# a) which consists of a pretrained language model as a basis
language_model = Bert.load(base_LM_model)
# b) and a prediction head on top that is suited for our task => Question Answering
prediction_head = QuestionAnsweringHead(
    layer_dims=[768, len(processor.label_list)])

Exemple #13

0

Afficher le fichier

Fichier : farm.py Projet : koyeli28/haystack

    def train(
        self,
        data_dir: str,
        train_filename: str,
        dev_filename: Optional[str] = None,
        test_filename: Optional[str] = None,
        use_gpu: Optional[bool] = None,
        batch_size: int = 10,
        n_epochs: int = 2,
        learning_rate: float = 1e-5,
        max_seq_len: Optional[int] = None,
        warmup_proportion: float = 0.2,
        dev_split: float = 0,
        evaluate_every: int = 300,
        save_dir: Optional[str] = None,
        num_processes: Optional[int] = None,
        use_amp: str = None,
    ):
        """
        Fine-tune a model on a QA dataset. Options:

        - Take a plain language model (e.g. `bert-base-cased`) and train it for QA (e.g. on SQuAD data)
        - Take a QA model (e.g. `deepset/bert-base-cased-squad2`) and fine-tune it for your domain (e.g. using your labels collected via the haystack annotation tool)

        :param data_dir: Path to directory containing your training data in SQuAD style
        :param train_filename: Filename of training data
        :param dev_filename: Filename of dev / eval data
        :param test_filename: Filename of test data
        :param dev_split: Instead of specifying a dev_filename, you can also specify a ratio (e.g. 0.1) here
                          that gets split off from training data for eval.
        :param use_gpu: Whether to use GPU (if available)
        :param batch_size: Number of samples the model receives in one batch for training
        :param n_epochs: Number of iterations on the whole training data set
        :param learning_rate: Learning rate of the optimizer
        :param max_seq_len: Maximum text length (in tokens). Everything longer gets cut down.
        :param warmup_proportion: Proportion of training steps until maximum learning rate is reached.
                                  Until that point LR is increasing linearly. After that it's decreasing again linearly.
                                  Options for different schedules are available in FARM.
        :param evaluate_every: Evaluate the model every X steps on the hold-out eval dataset
        :param save_dir: Path to store the final model
        :param num_processes: The number of processes for `multiprocessing.Pool` during preprocessing.
                              Set to value of 1 to disable multiprocessing. When set to 1, you cannot split away a dev set from train set.
                              Set to None to use all CPU cores minus one.
        :param use_amp: Optimization level of NVIDIA's automatic mixed precision (AMP). The higher the level, the faster the model.
                        Available options:
                        None (Don't use AMP)
                        "O0" (Normal FP32 training)
                        "O1" (Mixed Precision => Recommended)
                        "O2" (Almost FP16)
                        "O3" (Pure FP16).
                        See details on: https://nvidia.github.io/apex/amp.html
        :return: None
        """

        if dev_filename:
            dev_split = 0

        if num_processes is None:
            num_processes = multiprocessing.cpu_count() - 1 or 1

        set_all_seeds(seed=42)

        # For these variables, by default, we use the value set when initializing the FARMReader.
        # These can also be manually set when train() is called if you want a different value at train vs inference
        if use_gpu is None:
            use_gpu = self.use_gpu
        if max_seq_len is None:
            max_seq_len = self.max_seq_len

        device, n_gpu = initialize_device_settings(use_cuda=use_gpu,
                                                   use_amp=use_amp)

        if not save_dir:
            save_dir = f"../../saved_models/{self.inferencer.model.language_model.name}"

        # 1. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
        label_list = ["start_token", "end_token"]
        metric = "squad"
        processor = SquadProcessor(
            tokenizer=self.inferencer.processor.tokenizer,
            max_seq_len=max_seq_len,
            label_list=label_list,
            metric=metric,
            train_filename=train_filename,
            dev_filename=dev_filename,
            dev_split=dev_split,
            test_filename=test_filename,
            data_dir=Path(data_dir),
        )

        # 2. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them
        # and calculates a few descriptive statistics of our datasets
        data_silo = DataSilo(processor=processor,
                             batch_size=batch_size,
                             distributed=False,
                             max_processes=num_processes)

        # Quick-fix until this is fixed upstream in FARM:
        # We must avoid applying DataParallel twice (once when loading the inferencer,
        # once when calling initalize_optimizer)
        self.inferencer.model.save("tmp_model")
        model = BaseAdaptiveModel.load(load_dir="tmp_model",
                                       device=device,
                                       strict=True)
        shutil.rmtree('tmp_model')

        # 3. Create an optimizer and pass the already initialized model
        model, optimizer, lr_schedule = initialize_optimizer(
            model=model,
            # model=self.inferencer.model,
            learning_rate=learning_rate,
            schedule_opts={
                "name": "LinearWarmup",
                "warmup_proportion": warmup_proportion
            },
            n_batches=len(data_silo.loaders["train"]),
            n_epochs=n_epochs,
            device=device,
            use_amp=use_amp,
        )
        # 4. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time
        trainer = Trainer(
            model=model,
            optimizer=optimizer,
            data_silo=data_silo,
            epochs=n_epochs,
            n_gpu=n_gpu,
            lr_schedule=lr_schedule,
            evaluate_every=evaluate_every,
            device=device,
            use_amp=use_amp,
        )

        # 5. Let it grow!
        self.inferencer.model = trainer.train()
        self.save(Path(save_dir))

Exemple #14

0

Afficher le fichier

Fichier : infer.py Projet : yon606/FARM

    def load(cls,
             model_name_or_path,
             batch_size=4,
             gpu=False,
             task_type=None,
             return_class_probs=False,
             strict=True,
             max_seq_len=256,
             doc_stride=128,
             extraction_layer=None,
             extraction_strategy=None,
             s3e_stats=None,
             num_processes=None,
             disable_tqdm=False):
        """
        Load an Inferencer incl. all relevant components (model, tokenizer, processor ...) either by

        1. specifying a public name from transformers' model hub (https://huggingface.co/models)
        2. or pointing to a local directory it is saved in.

        :param model_name_or_path: Local directory or public name of the model to load.
        :type model_name_or_path: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param task_type: Type of task the model should be used for. Currently supporting:
                          "embeddings", "question_answering", "text_classification", "ner". More coming soon...
        :param task_type: str
        :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in
                       the PredictionHead (see torch.nn.module.load_state_dict()).
                       Set to `False` for backwards compatibility with PHs saved with older version of FARM.
        :type strict: bool
        :param max_seq_len: maximum length of one text sample
        :type max_seq_len: int
        :param doc_stride: Only QA: When input text is longer than max_seq_len it gets split into parts, strided by doc_stride
        :type doc_stride: int
        :param extraction_strategy: Strategy to extract vectors. Choices: 'cls_token' (sentence vector), 'reduce_mean'
                               (sentence vector), reduce_max (sentence vector), 'per_token' (individual token vectors)
        :type extraction_strategy: str
        :param extraction_layer: number of layer from which the embeddings shall be extracted. Default: -1 (very last layer).
        :type extraction_layer: int
        :param s3e_stats: Stats of a fitted S3E model as returned by `fit_s3e_on_corpus()`
                          (only needed for task_type="embeddings" and extraction_strategy = "s3e")
        :type s3e_stats: dict
        :param num_processes: the number of processes for `multiprocessing.Pool`. Set to value of 0 to disable
                              multiprocessing. Set to None to let Inferencer use all CPU cores. If you want to
                              debug the Language Model, you might need to disable multiprocessing!
        :type num_processes: int
        :param disable_tqdm: Whether to disable tqdm logging (can get very verbose in multiprocessing)
        :type disable_tqdm: bool
        :return: An instance of the Inferencer.

        """

        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   use_amp=None)
        name = os.path.basename(model_name_or_path)

        # a) either from local dir
        if os.path.exists(model_name_or_path):
            model = BaseAdaptiveModel.load(load_dir=model_name_or_path,
                                           device=device,
                                           strict=strict)
            if task_type == "embeddings":
                processor = InferenceProcessor.load_from_dir(
                    model_name_or_path)
            else:
                processor = Processor.load_from_dir(model_name_or_path)

            # override processor attributes loaded from config file with inferencer params
            processor.max_seq_len = max_seq_len
            if hasattr(processor, "doc_stride"):
                processor.doc_stride = doc_stride

        # b) or from remote transformers model hub
        else:
            logger.info(
                f"Could not find `{model_name_or_path}` locally. Try to download from model hub ..."
            )
            if not task_type:
                raise ValueError(
                    "Please specify the 'task_type' of the model you want to load from transformers. "
                    "Valid options for arg `task_type`:"
                    "'question_answering', 'embeddings', 'text_classification', 'ner'"
                )

            model = AdaptiveModel.convert_from_transformers(
                model_name_or_path, device, task_type)
            config = AutoConfig.from_pretrained(model_name_or_path)
            tokenizer = Tokenizer.load(model_name_or_path)

            # TODO infer task_type automatically from config (if possible)
            if task_type == "question_answering":
                processor = SquadProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    label_list=["start_token", "end_token"],
                    metric="squad",
                    data_dir="data",
                    doc_stride=doc_stride)
            elif task_type == "embeddings":
                processor = InferenceProcessor(tokenizer=tokenizer,
                                               max_seq_len=max_seq_len)

            elif task_type == "text_classification":
                label_list = list(config.id2label[id]
                                  for id in range(len(config.id2label)))
                processor = TextClassificationProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    data_dir="data",
                    label_list=label_list,
                    label_column_name="label",
                    metric="acc",
                    quote_char='"',
                )
            elif task_type == "ner":
                label_list = list(config.label2id.keys())
                processor = NERProcessor(tokenizer=tokenizer,
                                         max_seq_len=max_seq_len,
                                         data_dir="data",
                                         metric="seq_f1",
                                         label_list=label_list)
            else:
                raise ValueError(
                    f"`task_type` {task_type} is not supported yet. "
                    f"Valid options for arg `task_type`: 'question_answering', "
                    f"'embeddings', 'text_classification', 'ner'")

        return cls(model,
                   processor,
                   task_type=task_type,
                   batch_size=batch_size,
                   gpu=gpu,
                   name=name,
                   return_class_probs=return_class_probs,
                   extraction_strategy=extraction_strategy,
                   extraction_layer=extraction_layer,
                   s3e_stats=s3e_stats,
                   num_processes=num_processes,
                   disable_tqdm=disable_tqdm)

Exemple #15

0

Afficher le fichier

def main(args):
    print(f"[INFO] PyTorch Version: {torch.__version__}")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("[INFO] Devices available: {}".format(device))
    checkpoint_path = Path(args.ckpt_path) / args.run_name
    ml_logger = MLFlowLogger(tracking_uri=args.tracking_uri)
    ml_logger.init_experiment(experiment_name=args.experiment_name,
                              run_name=args.run_name)
    tokenizer = Tokenizer.load(
        pretrained_model_name_or_path=args.pretrained_model_name_or_path,
        do_lower_case=False)
    # Processor
    if args.task_name == "text_classification":
        processor = TextClassificationProcessor(
            tokenizer=tokenizer,
            train_filename=args.train_filename,
            dev_filename=None,
            test_filename=args.test_filename,
            header=0,
            max_seq_len=args.max_seq_len,
            data_dir=args.data_dir,
            label_list=args.label_list,
            metric=args.metric,
            label_column_name=args.label_column_name,
            text_column_name=args.text_column_name)
    elif args.task_name == "question_answering":
        processor = SquadProcessor(tokenizer=tokenizer,
                                   train_filename=args.train_filename,
                                   dev_filename=args.test_filename,
                                   test_filename=args.test_filename,
                                   max_seq_len=args.max_seq_len,
                                   data_dir=args.data_dir,
                                   label_list=args.label_list,
                                   metric=args.metric,
                                   max_query_length=64,
                                   doc_stride=128,
                                   max_answers=1)
    else:
        raise ValueError("task name error")
    processor.save(checkpoint_path)

    # DataSilo
    data_silo = DataSilo(processor=processor,
                         batch_size=args.batch_size,
                         eval_batch_size=args.eval_batch_size,
                         caching=True,
                         cache_path=checkpoint_path)
    # LanguageModel: Build pretrained language model
    language_model = LanguageModel.load(args.pretrained_model_name_or_path,
                                        language="korean")

    # PredictionHead: Build predictor layer
    if args.task_name == "text_classification":
        # If you do classification on imbalanced classes, consider using class weights.
        # They change the loss function to down-weight frequent classes.
        prediction_head = TextClassificationHead(
            num_labels=len(args.label_list),
            class_weights=data_silo.calculate_class_weights(
                task_name=args.task_name))
    elif args.task_name == "question_answering":
        prediction_head = QuestionAnsweringHead(
            layer_dims=[768, 2],
            task_name=args.task_name,
        )
    else:
        raise ValueError("task name error")

    # AdaptiveModel: Combine all
    if args.task_name == "text_classification":
        lm_output_types = ["per_sequence"]
    elif args.task_name == "question_answering":
        lm_output_types = ["per_token"]
    else:
        raise ValueError("task name error")

    model = AdaptiveModel(language_model=language_model,
                          prediction_heads=[prediction_head],
                          embeds_dropout_prob=args.embeds_dropout_prob,
                          lm_output_types=lm_output_types,
                          device=device)

    # Initialize Optimizer
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        device=device,
        learning_rate=args.learning_rate,
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=args.n_epochs)
    # EarlyStopping
    earlymetric = "f1" if args.task_name == "question_answering" else "acc"
    mode = "max" if args.task_name in [
        "text_classification", "question_answering"
    ] else "min"
    earlystop = EarlyStopping(save_dir=checkpoint_path,
                              metric=earlymetric,
                              mode=mode,
                              patience=5)

    # Trainer
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        lr_schedule=lr_schedule,
        data_silo=data_silo,
        early_stopping=earlystop,
        evaluate_every=args.evaluate_every,
        checkpoints_to_keep=args.checkpoints_to_keep,
        checkpoint_root_dir=checkpoint_path,
        checkpoint_every=args.checkpoint_every,
        epochs=args.n_epochs,
        n_gpu=args.n_gpu,
        device=device,
    )
    # now train!
    model = trainer.train()

Exemple #16

0

Afficher le fichier

from farm.modeling.tokenization import BertTokenizer
from farm.modeling.prediction_head import QuestionAnsweringHead
from farm.data_handler.processor import SquadProcessor
from farm.utils import initialize_device_settings

####################### loads a SQUAD finetuned model
# saves it as a FARM adaptive model
device, n_gpu = initialize_device_settings(use_cuda=True)
model = "bert-large-uncased-whole-word-masking-finetuned-squad"
save_dir = "saved_models/FARM-bert-large-uncased-whole-word-masking-finetuned-squad"
lm = Bert.load(model)
ph = QuestionAnsweringHead.load(model)
am = AdaptiveModel(language_model=lm,
                   prediction_heads=[ph],
                   embeds_dropout_prob=0.1,
                   lm_output_types="per_token",
                   device=device)
am.save(save_dir)
# saves the processor associated with it, so you can use it in inference mode
# TODO load HF's tokenizer_config.json and adjust settings
tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path=model)
label_list = ["start_token", "end_token"]
metric = "squad"
processor = SquadProcessor(
    tokenizer=tokenizer,
    max_seq_len=256,
    label_list=label_list,
    metric=metric,
    data_dir="../data/squad20",
)
processor.save(save_dir)

Exemple #17

0

Afficher le fichier

def question_answering_confidence():
    ##########################
    ########## Logging
    ##########################
    logger = logging.getLogger(__name__)
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO)
    # reduce verbosity from transformers library
    logging.getLogger('transformers').setLevel(logging.WARNING)

    ##########################
    ########## Settings
    ##########################
    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=True)

    lang_model = "deepset/roberta-base-squad2"
    do_lower_case = False
    batch_size = 80

    data_dir = Path("../data/squad20")
    # We use the same file for dev and test set only for demo purposes
    dev_filename = "dev-v2.0.json"
    test_filename = "dev-v2.0.json"
    accuracy_at = 3 # accuracy at n is useful for answers inside long documents


    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(
        pretrained_model_name_or_path=lang_model,
        do_lower_case=do_lower_case)

    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=384,
        label_list=["start_token", "end_token"],
        metric="squad",
        train_filename=None,
        dev_filename=dev_filename,
        test_filename=test_filename,
        data_dir=data_dir,
        doc_stride=192,
    )

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
    data_silo = DataSilo(
        processor=processor,
        batch_size=batch_size)


    # 4. Load pre-trained question-answering model
    model = AdaptiveModel.convert_from_transformers(lang_model, device=device, task_type="question_answering")
    model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True)
    # Number of predictions the model will make per Question.
    # The multiple predictions are used for evaluating top n recall.
    model.prediction_heads[0].n_best = accuracy_at

    # 5. The calibration of model confidence scores sets one parameter, which is called temperature and can be accessed through the prediction_head.
    # This temperature is applied to each logit in the forward pass, where each logit is divided by the temperature.
    # A softmax function is applied to the logits afterward to get confidence scores in the range [0,1].
    # A temperature larger than 1 decreases the model’s confidence scores.
    logger.info(f"Parameter used for temperature scaling of model confidence scores: {model.prediction_heads[0].temperature_for_confidence}")

    # 6a. We can either manually set the temperature (default value is 1.0)...
    model.prediction_heads[0].temperature_for_confidence = torch.nn.Parameter((torch.ones(1) * 1.0).to(device=device))

    # 6b. ...or we can run the evaluator on the dev set and use it to calibrate confidence scores with a technique called temperature scaling.
    # It will align the confidence scores with the model's accuracy based on the dev set data by tuning the temperature parameter.
    # During the calibration, this parameter is automatically set internally as an attribute of the prediction head.
    evaluator_dev = Evaluator(
        data_loader=data_silo.get_data_loader("dev"),
        tasks=data_silo.processor.tasks,
        device=device
    )
    result_dev = evaluator_dev.eval(model, return_preds_and_labels=True, calibrate_conf_scores=True)
    # evaluator_dev.log_results(result_dev, "Dev", logging=False, steps=len(data_silo.get_data_loader("dev")))

    # 7. Optionally, run the evaluator on the test set to see how well the confidence scores are aligned with the model's accuracy
    evaluator_test = Evaluator(
        data_loader=data_silo.get_data_loader("test"),
        tasks=data_silo.processor.tasks,
        device=device
    )
    result_test = evaluator_test.eval(model, return_preds_and_labels=True)[0]
    logger.info("Grouping predictions by confidence score and calculating metrics for each bin.")
    em_per_bin, confidence_per_bin, count_per_bin = metrics_per_bin(result_test["preds"], result_test["labels"], num_bins=10)
    for bin_number in range(10):
        logger.info(f"Bin {bin_number} - exact match: {em_per_bin[bin_number]}, average confidence score: {confidence_per_bin[bin_number]}")

    # 8. Hooray! You have a model with calibrated confidence scores.
    # Store the model and the temperature parameter will be stored automatically as an attribute of the prediction head.
    save_dir = Path("../saved_models/qa-confidence-tutorial")
    model.save(save_dir)
    processor.save(save_dir)

    # 9. When making a prediction with the calibrated model, we could filter out predictions where the model is not confident enough
    # To this end, load the stored model, which will automatically load the stored temperature parameter.
    # The confidence scores are automatically adjusted based on this temperature parameter.
    # For each prediction, we can check the model's confidence and decide whether to output the prediction or not.
    inferencer = QAInferencer.load(save_dir, batch_size=40, gpu=True)
    logger.info(f"Loaded model with stored temperature: {inferencer.model.prediction_heads[0].temperature_for_confidence}")

    QA_input = [
        {
            "questions": ["Who counted the game among the best ever made?"],
            "text": "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
        }]
    result = inferencer.inference_from_dicts(dicts=QA_input, return_json=False)[0]
    if result.prediction[0].confidence > 0.9:
        print(result.prediction[0].answer)
    else:
        print("The confidence is not high enough to give an answer.")

Exemple #18

0

Afficher le fichier

Fichier : test_processor_qa.py Projet : voidful/FARM

def test_dataset_from_dicts_qa_inference(caplog=None):
    if caplog:
        caplog.set_level(logging.CRITICAL)

    models = [
        "deepset/roberta-base-squad2",
        "deepset/bert-base-cased-squad2",
        "deepset/xlm-roberta-large-squad2",
        "deepset/minilm-uncased-squad2",
        "deepset/electra-base-squad2",
    ]
    sample_types = [
        "answer-wrong", "answer-offset-wrong", "noanswer", "vanilla"
    ]

    for model in models:
        tokenizer = Tokenizer.load(pretrained_model_name_or_path=model,
                                   use_fast=True)
        processor = SquadProcessor(tokenizer, max_seq_len=256, data_dir=None)

        for sample_type in sample_types:
            dicts = processor.file_to_dicts(f"samples/qa/{sample_type}.json")
            dataset, tensor_names, problematic_sample_ids, baskets = processor.dataset_from_dicts(
                dicts, indices=[1], return_baskets=True)
            assert tensor_names == [
                'input_ids', 'padding_mask', 'segment_ids', 'passage_start_t',
                'start_of_word', 'labels', 'id', 'seq_2_start_t', 'span_mask'
            ], f"Processing for {model} has changed."
            assert len(problematic_sample_ids
                       ) == 0, f"Processing for {model} has changed."
            assert baskets[
                0].id_external == '5ad3d560604f3c001a3ff2c8', f"Processing for {model} has changed."
            assert baskets[
                0].id_internal == '1-0', f"Processing for {model} has changed."

            # roberta
            if model == "deepset/roberta-base-squad2":
                assert len(baskets[0].samples[0].tokenized["passage_tokens"]
                           ) == 6, f"Processing for {model} has changed."
                assert len(baskets[0].samples[0].tokenized["question_tokens"]
                           ) == 7, f"Processing for {model} has changed."
                if sample_type == "noanswer":
                    assert baskets[0].samples[0].features[0]["input_ids"][:13] == \
                           [0, 6179, 171, 82, 697, 11, 2201, 116, 2, 2, 26795, 2614, 34], \
                        f"Processing for {model} and {sample_type}-testsample has changed."
                else:
                    assert baskets[0].samples[0].features[0]["input_ids"][:13] == \
                           [0, 6179, 171, 82, 697, 11, 5459, 116, 2, 2, 26795, 2614, 34], \
                        f"Processing for {model} and {sample_type}-testsample has changed."

            # bert
            if model == "deepset/bert-base-cased-squad2":
                assert len(baskets[0].samples[0].tokenized["passage_tokens"]
                           ) == 5, f"Processing for {model} has changed."
                assert len(baskets[0].samples[0].tokenized["question_tokens"]
                           ) == 7, f"Processing for {model} has changed."
                if sample_type == "noanswer":
                    assert baskets[0].samples[0].features[0]["input_ids"][:10] == \
                           [101, 1731, 1242, 1234, 1686, 1107, 2123, 136, 102, 3206], \
                        f"Processing for {model} and {sample_type}-testsample has changed."
                else:
                    assert baskets[0].samples[0].features[0]["input_ids"][:10] == \
                           [101, 1731, 1242, 1234, 1686, 1107, 3206, 136, 102, 3206], \
                        f"Processing for {model} and {sample_type}-testsample has changed."

            # xlm-roberta
            if model == "deepset/xlm-roberta-large-squad2":
                assert len(baskets[0].samples[0].tokenized["passage_tokens"]
                           ) == 7, f"Processing for {model} has changed."
                assert len(baskets[0].samples[0].tokenized["question_tokens"]
                           ) == 7, f"Processing for {model} has changed."
                if sample_type == "noanswer":
                    assert baskets[0].samples[0].features[0]["input_ids"][:12] == \
                           [0, 11249, 5941, 3395, 6867, 23, 7270, 32, 2, 2, 10271, 1556], \
                        f"Processing for {model} and {sample_type}-testsample has changed."
                else:
                    assert baskets[0].samples[0].features[0]["input_ids"][:12] == \
                           [0, 11249, 5941, 3395, 6867, 23, 10271, 32, 2, 2, 10271, 1556], \
                        f"Processing for {model} and {sample_type}-testsample has changed."

            # minilm and electra have same vocab + tokenizer
            if model == "deepset/minilm-uncased-squad2" or model == "deepset/electra-base-squad2":
                assert len(baskets[0].samples[0].tokenized["passage_tokens"]
                           ) == 5, f"Processing for {model} has changed."
                assert len(baskets[0].samples[0].tokenized["question_tokens"]
                           ) == 7, f"Processing for {model} has changed."
                if sample_type == "noanswer":
                    assert baskets[0].samples[0].features[0]["input_ids"][:10] == \
                           [101, 2129, 2116, 2111, 2444, 1999, 3000, 1029, 102, 4068], \
                        f"Processing for {model} and {sample_type}-testsample has changed."
                else:
                    assert baskets[0].samples[0].features[0]["input_ids"][:10] == \
                           [101, 2129, 2116, 2111, 2444, 1999, 4068, 1029, 102, 4068], \
                        f"Processing for {model} and {sample_type}-testsample has changed."

Exemple #19

0

Afficher le fichier

def test_qa(caplog):
    caplog.set_level(logging.CRITICAL)

    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=False)
    batch_size = 2
    n_epochs = 1
    evaluate_every = 4
    base_LM_model = "bert-base-cased"

    tokenizer = BertTokenizer.from_pretrained(
        pretrained_model_name_or_path=base_LM_model, do_lower_case=False)
    label_list = ["start_token", "end_token"]
    processor = SquadProcessor(tokenizer=tokenizer,
                               max_seq_len=16,
                               max_query_length=4,
                               train_filename="train-sample.json",
                               dev_filename="dev-sample.json",
                               test_filename=None,
                               data_dir="samples/qa",
                               labels=label_list,
                               metric="squad")

    data_silo = DataSilo(processor=processor, batch_size=batch_size)
    language_model = Bert.load(base_LM_model)
    prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)])
    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_token"],
        device=device,
    )

    optimizer, warmup_linear = initialize_optimizer(
        model=model,
        learning_rate=1e-5,
        warmup_proportion=0.2,
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
    )
    trainer = Trainer(
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=n_epochs,
        n_gpu=n_gpu,
        warmup_linear=warmup_linear,
        evaluate_every=evaluate_every,
        device=device,
    )
    model = trainer.train(model)
    save_dir = "testsave/qa"
    model.save(save_dir)
    processor.save(save_dir)

    QA_input = [{
        "questions": ["In what country is Normandy located?"],
        "text":
        'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.',
    }]

    model = Inferencer.load(save_dir)
    result = model.run_inference(dicts=QA_input)
    assert isinstance(result[0]["predictions"][0]["end"], int)

Exemple #20

0

Afficher le fichier

def question_answering():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

    ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
    ml_logger.init_experiment(experiment_name="Public_FARM",
                              run_name="Run_question_answering")

    ##########################
    ########## Settings
    ##########################
    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=True)
    batch_size = 24
    n_epochs = 2
    evaluate_every = 2000
    lang_model = "roberta-base"
    do_lower_case = False  # roberta is a cased model
    train_filename = "train-v2.0.json"
    dev_filename = "dev-v2.0.json"

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)
    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    label_list = ["start_token", "end_token"]
    metric = "squad"
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=384,
        label_list=label_list,
        metric=metric,
        train_filename=train_filename,
        dev_filename=dev_filename,
        test_filename=None,
        data_dir=Path("../data/squad20"),
    )

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
    # NOTE: In FARM, the dev set metrics differ from test set metrics in that they are calculated on a token level instead of a word level
    data_silo = DataSilo(processor=processor,
                         batch_size=batch_size,
                         distributed=False)

    # 4. Create an AdaptiveModel
    # a) which consists of a pretrained language model as a basis
    language_model = LanguageModel.load(lang_model)
    # b) and a prediction head on top that is suited for our task => Question Answering
    prediction_head = QuestionAnsweringHead()

    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_token"],
        device=device,
    )

    # 5. Create an optimizer
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=3e-5,
        schedule_opts={
            "name": "LinearWarmup",
            "warmup_proportion": 0.2
        },
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        device=device)
    # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time
    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=n_epochs,
        n_gpu=n_gpu,
        lr_schedule=lr_schedule,
        evaluate_every=evaluate_every,
        device=device,
    )
    # 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai
    trainer.train()

    # 8. Hooray! You have a model. Store it:
    save_dir = Path("../saved_models/bert-english-qa-tutorial")
    model.save(save_dir)
    processor.save(save_dir)

    # 9. Load it & harvest your fruits (Inference)
    QA_input = [{
        "qas": ["Who counted the game among the best ever made?"],
        "context":
        "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
    }]

    model = QAInferencer.load(save_dir, batch_size=40, gpu=True)
    result = model.inference_from_dicts(dicts=QA_input)[0]

    pprint.pprint(result)

    # 10. Do Inference on whole SQuAD Dataset & write the predictions file to disk
    filename = os.path.join(processor.data_dir, processor.dev_filename)
    result = model.inference_from_file(file=filename, return_json=False)
    result_squad = [x.to_squad_eval() for x in result]

    write_squad_predictions(predictions=result_squad,
                            predictions_filename=filename,
                            out_filename="predictions.json")

Exemple #21

0

Afficher le fichier

Fichier : farm.py Projet : vchulski/haystack

    def train(self,
              data_dir,
              train_filename,
              dev_filename=None,
              test_file_name=None,
              use_gpu=None,
              batch_size=10,
              n_epochs=2,
              learning_rate=1e-5,
              max_seq_len=None,
              warmup_proportion=0.2,
              dev_split=0.1,
              evaluate_every=300,
              save_dir=None):
        """
        Fine-tune a model on a QA dataset. Options:
        - Take a plain language model (e.g. `bert-base-cased`) and train it for QA (e.g. on SQuAD data)
        - Take a QA model (e.g. `deepset/bert-base-cased-squad2`) and fine-tune it for your domain (e.g. using your labels collected via the haystack annotation tool)

        :param data_dir: Path to directory containing your training data in SQuAD style
        :param train_filename: filename of training data
        :param dev_filename: filename of dev / eval data
        :param test_file_name: filename of test data
        :param dev_split: Instead of specifying a dev_filename you can also specify a ratio (e.g. 0.1) here
                          that get's split off from training data for eval.
        :param use_gpu: Whether to use GPU (if available)
        :param batch_size: Number of samples the model receives in one batch for training
        :param n_epochs: number of iterations on the whole training data set
        :param learning_rate: learning rate of the optimizer
        :param max_seq_len: maximum text length (in tokens). Everything longer gets cut down.
        :param warmup_proportion: Proportion of training steps until maximum learning rate is reached.
                                  Until that point LR is increasing linearly. After that it's decreasing again linearly.
                                  Options for different schedules are available in FARM.
        :param evaluate_every: Evaluate the model every X steps on the hold-out eval dataset
        :param save_dir: Path to store the final model
        :return: None
        """

        if dev_filename:
            dev_split = None

        set_all_seeds(seed=42)

        # For these variables, by default, we use the value set when initializing the FARMReader.
        # These can also be manually set when train() is called if you want a different value at train vs inference
        if use_gpu is None:
            use_gpu = self.use_gpu
        if max_seq_len is None:
            max_seq_len = self.max_seq_len

        device, n_gpu = initialize_device_settings(use_cuda=use_gpu)

        if not save_dir:
            save_dir = f"../../saved_models/{self.inferencer.model.language_model.name}"
        save_dir = Path(save_dir)

        # 1. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
        label_list = ["start_token", "end_token"]
        metric = "squad"
        processor = SquadProcessor(
            tokenizer=self.inferencer.processor.tokenizer,
            max_seq_len=max_seq_len,
            label_list=label_list,
            metric=metric,
            train_filename=train_filename,
            dev_filename=dev_filename,
            dev_split=dev_split,
            test_filename=test_file_name,
            data_dir=Path(data_dir),
        )

        # 2. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them
        # and calculates a few descriptive statistics of our datasets
        data_silo = DataSilo(processor=processor,
                             batch_size=batch_size,
                             distributed=False)

        # 3. Create an optimizer and pass the already initialized model
        model, optimizer, lr_schedule = initialize_optimizer(
            model=self.inferencer.model,
            learning_rate=learning_rate,
            schedule_opts={
                "name": "LinearWarmup",
                "warmup_proportion": warmup_proportion
            },
            n_batches=len(data_silo.loaders["train"]),
            n_epochs=n_epochs,
            device=device)
        # 4. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time
        trainer = Trainer(
            model=model,
            optimizer=optimizer,
            data_silo=data_silo,
            epochs=n_epochs,
            n_gpu=n_gpu,
            lr_schedule=lr_schedule,
            evaluate_every=evaluate_every,
            device=device,
        )
        # 5. Let it grow!
        self.inferencer.model = trainer.train()
        self.save(save_dir)

Exemple #22

0

Afficher le fichier

Fichier : question_answering_accuracy.py Projet : ftesser/FARM

def test_evaluation():
    ##########################
    ########## Settings
    ##########################
    lang_model = "deepset/roberta-base-squad2"
    do_lower_case = False

    test_assertions = False

    data_dir = Path("testsave/data/squad20")
    evaluation_filename = "dev-v2.0.json"

    device, n_gpu = initialize_device_settings(use_cuda=True)

    # loading models and evals
    model = AdaptiveModel.convert_from_transformers(
        lang_model, device=device, task_type="question_answering")
    model.prediction_heads[0].no_ans_boost = 0
    model.prediction_heads[0].n_best = 1
    model.prediction_heads[0].n_best_per_sample = 1

    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=256,
        label_list=["start_token", "end_token"],
        metric="squad",
        train_filename=None,
        dev_filename=None,
        dev_split=0,
        test_filename=evaluation_filename,
        data_dir=data_dir,
        doc_stride=128,
    )

    starttime = time()

    data_silo = DataSilo(processor=processor, batch_size=40 * n_gpu_factor)
    model.connect_heads_with_processor(data_silo.processor.tasks,
                                       require_labels=True)
    model, _ = optimize_model(model=model,
                              device=device,
                              local_rank=-1,
                              optimizer=None,
                              distributed=False,
                              use_amp=None)

    evaluator = Evaluator(data_loader=data_silo.get_data_loader("test"),
                          tasks=data_silo.processor.tasks,
                          device=device)

    # 1. Test FARM internal evaluation
    results = evaluator.eval(model)
    f1_score = results[0]["f1"] * 100
    em_score = results[0]["EM"] * 100
    tnacc = results[0]["top_n_accuracy"] * 100
    elapsed = time() - starttime
    print(results)
    print(elapsed)

    gold_EM = 78.4721
    gold_f1 = 82.6671
    gold_tnacc = 84.3594  # top 1 recall
    gold_elapsed = 40  # 4x V100
    if test_assertions:
        np.testing.assert_allclose(
            em_score,
            gold_EM,
            rtol=0.001,
            err_msg=f"FARM Eval changed for EM by: {em_score-gold_EM}")
        np.testing.assert_allclose(
            f1_score,
            gold_f1,
            rtol=0.001,
            err_msg=f"FARM Eval changed for f1 score by: {f1_score-gold_f1}")
        np.testing.assert_allclose(
            tnacc,
            gold_tnacc,
            rtol=0.001,
            err_msg=
            f"FARM Eval changed for top 1 accuracy by: {tnacc-gold_tnacc}")
        np.testing.assert_allclose(
            elapsed,
            gold_elapsed,
            rtol=0.1,
            err_msg=
            f"FARM Eval speed changed significantly by: {elapsed - gold_elapsed} seconds"
        )

    if not np.allclose(f1_score, gold_f1, rtol=0.001):
        error_messages.append(
            f"FARM Eval changed for f1 score by: {round(f1_score - gold_f1, 4)}"
        )
    if not np.allclose(em_score, gold_EM, rtol=0.001):
        error_messages.append(
            f"FARM Eval changed for EM by: {round(em_score - gold_EM, 4)}")
    if not np.allclose(tnacc, gold_tnacc, rtol=0.001):
        error_messages.append(
            f"FARM Eval changed for top 1 accuracy by: {round(tnacc-gold_tnacc, 4)}"
        )
    if not np.allclose(elapsed, gold_elapsed, rtol=0.1):
        error_messages.append(
            f"FARM Eval speed changed significantly by: {round(elapsed - gold_elapsed, 4)} seconds"
        )

    benchmark_result = [{
        "run": "FARM internal evaluation",
        "f1_change": round(f1_score - gold_f1, 4),
        "em_change": round(em_score - gold_EM, 4),
        "tnacc_change": round(tnacc - gold_tnacc, 4),
        "elapsed_change": round(elapsed - gold_elapsed, 4),
        "f1": f1_score,
        "em": em_score,
        "tnacc": round(tnacc, 4),
        "elapsed": elapsed,
        "f1_gold": gold_f1,
        "em_gold": gold_EM,
        "tnacc_gold": gold_tnacc,
        "elapsed_gold": gold_elapsed
    }]
    logger.info("\n\n" + pformat(benchmark_result[0]) + "\n")

    # # 2. Test FARM predictions with outside eval script
    starttime = time()
    model = Inferencer(model=model,
                       processor=processor,
                       task_type="question_answering",
                       batch_size=40 * n_gpu_factor,
                       gpu=device.type == "cuda")
    filename = data_dir / evaluation_filename
    result = model.inference_from_file(file=filename,
                                       return_json=False,
                                       multiprocessing_chunksize=80)
    results_squad = [x.to_squad_eval() for x in result]
    model.close_multiprocessing_pool()

    elapsed = time() - starttime

    os.makedirs("../testsave", exist_ok=True)
    write_squad_predictions(predictions=results_squad,
                            predictions_filename=filename,
                            out_filename="testsave/predictions.json")
    script_params = {
        "data_file": filename,
        "pred_file": "testsave/predictions.json",
        "na_prob_thresh": 1,
        "na_prob_file": False,
        "out_file": False
    }
    results_official = squad_evaluation.main(OPTS=DotMap(script_params))
    f1_score = results_official["f1"]
    em_score = results_official["exact"]

    gold_EM = 79.878
    gold_f1 = 82.917
    gold_elapsed = 27  # 4x V100
    print(elapsed)
    if test_assertions:
        np.testing.assert_allclose(
            em_score,
            gold_EM,
            rtol=0.001,
            err_msg=
            f"Eval with official script changed for EM by: {em_score - gold_EM}"
        )
        np.testing.assert_allclose(
            f1_score,
            gold_f1,
            rtol=0.001,
            err_msg=
            f"Eval with official script changed for f1 score by: {f1_score - gold_f1}"
        )
        np.testing.assert_allclose(
            elapsed,
            gold_elapsed,
            rtol=0.1,
            err_msg=
            f"Inference speed changed significantly by: {elapsed - gold_elapsed} seconds"
        )
    if not np.allclose(f1_score, gold_f1, rtol=0.001):
        error_messages.append(
            f"Eval with official script changed for f1 score by: {round(f1_score - gold_f1, 4)}"
        )
    if not np.allclose(em_score, gold_EM, rtol=0.001):
        error_messages.append(
            f"Eval with official script changed for EM by: {round(em_score - gold_EM, 4)}"
        )
    if not np.allclose(elapsed, gold_elapsed, rtol=0.1):
        error_messages.append(
            f"Inference speed changed significantly by: {round(elapsed - gold_elapsed,4)} seconds"
        )

    benchmark_result.append({
        "run": "outside eval script",
        "f1_change": round(f1_score - gold_f1, 4),
        "em_change": round(em_score - gold_EM, 4),
        "tnacc_change": "-",
        "elapsed_change": round(elapsed - gold_elapsed, 4),
        "f1": f1_score,
        "em": em_score,
        "tnacc": "-",
        "elapsed": elapsed,
        "f1_gold": gold_f1,
        "em_gold": gold_EM,
        "tnacc_gold": "-",
        "elapsed_gold": gold_elapsed
    })
    logger.info("\n\n" + pformat(benchmark_result[1]) + "\n")
    return benchmark_result

Exemple #23

0

Afficher le fichier

def question_answering_crossvalidation():
    ##########################
    ########## Logging
    ##########################
    logger = logging.getLogger(__name__)
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO)
    # reduce verbosity from transformers library
    logging.getLogger('transformers').setLevel(logging.WARNING)

    #ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
    # for local logging instead:
    ml_logger = MLFlowLogger(tracking_uri="logs")
    #ml_logger.init_experiment(experiment_name="QA_X-Validation", run_name="Squad_Roberta_Base")

    ##########################
    ########## Settings
    ##########################
    save_per_fold_results = False  # unsupported for now
    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=True)

    lang_model = "deepset/roberta-base-squad2"
    do_lower_case = False

    n_epochs = 2
    batch_size = 80
    learning_rate = 3e-5

    data_dir = Path("../data/covidqa")
    filename = "COVID-QA.json"
    xval_folds = 5
    dev_split = 0
    evaluate_every = 0
    no_ans_boost = -100  # use large negative values to disable giving "no answer" option
    accuracy_at = 3  # accuracy at n is useful for answers inside long documents
    use_amp = None

    ##########################
    ########## k fold Cross validation
    ##########################

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)

    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=384,
        label_list=["start_token", "end_token"],
        metric="squad",
        train_filename=filename,
        dev_filename=None,
        dev_split=dev_split,
        test_filename=None,
        data_dir=data_dir,
        doc_stride=192,
    )

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
    data_silo = DataSilo(processor=processor, batch_size=batch_size)

    # Load one silo for each fold in our cross-validation
    silos = DataSiloForCrossVal.make(data_silo, n_splits=xval_folds)

    # the following steps should be run for each of the folds of the cross validation, so we put them
    # into a function
    def train_on_split(silo_to_use, n_fold):
        logger.info(
            f"############ Crossvalidation: Fold {n_fold} ############")

        # fine-tune pre-trained question-answering model
        model = AdaptiveModel.convert_from_transformers(
            lang_model, device=device, task_type="question_answering")
        model.connect_heads_with_processor(data_silo.processor.tasks,
                                           require_labels=True)
        # If positive, thjs will boost "No Answer" as prediction.
        # If negative, this will prevent the model from giving "No Answer" as prediction.
        model.prediction_heads[0].no_ans_boost = no_ans_boost
        # Number of predictions the model will make per Question.
        # The multiple predictions are used for evaluating top n recall.
        model.prediction_heads[0].n_best = accuracy_at

        # # or train question-answering models from scratch
        # # Create an AdaptiveModel
        # # a) which consists of a pretrained language model as a basis
        # language_model = LanguageModel.load(lang_model)
        # # b) and a prediction head on top that is suited for our task => Question-answering
        # prediction_head = QuestionAnsweringHead(no_ans_boost=no_ans_boost, n_best=accuracy_at)
        # model = AdaptiveModel(
        #    language_model=language_model,
        #    prediction_heads=[prediction_head],
        #    embeds_dropout_prob=0.1,
        #    lm_output_types=["per_token"],
        #    device=device,)

        # Create an optimizer
        model, optimizer, lr_schedule = initialize_optimizer(
            model=model,
            learning_rate=learning_rate,
            device=device,
            n_batches=len(silo_to_use.loaders["train"]),
            n_epochs=n_epochs,
            use_amp=use_amp)

        # Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time
        # Also create an EarlyStopping instance and pass it on to the trainer

        trainer = Trainer(model=model,
                          optimizer=optimizer,
                          data_silo=silo_to_use,
                          epochs=n_epochs,
                          n_gpu=n_gpu,
                          lr_schedule=lr_schedule,
                          evaluate_every=evaluate_every,
                          device=device,
                          evaluator_test=False)

        # train it
        trainer.train()

        return trainer.model

    # for each fold, run the whole training, then evaluate the model on the test set of each fold
    # Remember all the results for overall metrics over all predictions of all folds and for averaging
    all_results = []
    all_preds = []
    all_labels = []
    all_f1 = []
    all_em = []
    all_topnaccuracy = []

    for num_fold, silo in enumerate(silos):
        model = train_on_split(silo, num_fold)

        # do eval on test set here (and not in Trainer),
        # so that we can easily store the actual preds and labels for a "global" eval across all folds.
        evaluator_test = Evaluator(data_loader=silo.get_data_loader("test"),
                                   tasks=silo.processor.tasks,
                                   device=device)
        result = evaluator_test.eval(model, return_preds_and_labels=True)
        evaluator_test.log_results(result,
                                   "Test",
                                   logging=False,
                                   steps=len(silo.get_data_loader("test")),
                                   num_fold=num_fold)

        all_results.append(result)
        all_preds.extend(result[0].get("preds"))
        all_labels.extend(result[0].get("labels"))
        all_f1.append(result[0]["f1"])
        all_em.append(result[0]["EM"])
        all_topnaccuracy.append(result[0]["top_n_accuracy"])

        # emtpy cache to avoid memory leak and cuda OOM across multiple folds
        model.cpu()
        torch.cuda.empty_cache()

    # Save the per-fold results to json for a separate, more detailed analysis
    # TODO currently not supported - adjust to QAPred and QACandidate objects
    # if save_per_fold_results:
    #     def convert_numpy_dtype(obj):
    #         if type(obj).__module__ == "numpy":
    #             return obj.item()
    #
    #         raise TypeError("Unknown type:", type(obj))
    #
    #     with open("qa_xval.results.json", "wt") as fp:
    #          json.dump(all_results, fp, default=convert_numpy_dtype)

    # calculate overall metrics across all folds
    xval_score = squad(preds=all_preds, labels=all_labels)

    logger.info(f"Single EM-Scores:   {all_em}")
    logger.info(f"Single F1-Scores:   {all_f1}")
    logger.info(
        f"Single top_{accuracy_at}_accuracy Scores:   {all_topnaccuracy}")
    logger.info(f"XVAL EM:   {xval_score['EM']}")
    logger.info(f"XVAL f1:   {xval_score['f1']}")
    logger.info(
        f"XVAL top_{accuracy_at}_accuracy:   {xval_score['top_n_accuracy']}")
    ml_logger.log_metrics({"XVAL EM": xval_score["EM"]}, 0)
    ml_logger.log_metrics({"XVAL f1": xval_score["f1"]}, 0)
    ml_logger.log_metrics(
        {f"XVAL top_{accuracy_at}_accuracy": xval_score["top_n_accuracy"]}, 0)

Exemple #24

0

Afficher le fichier

Fichier : test_question_answering.py Projet : yon606/FARM

def test_qa(caplog=None):
    if caplog:
        caplog.set_level(logging.CRITICAL)

    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=False)
    batch_size = 2
    n_epochs = 1
    evaluate_every = 4
    base_LM_model = "distilbert-base-uncased"

    tokenizer = Tokenizer.load(pretrained_model_name_or_path=base_LM_model,
                               do_lower_case=True)
    label_list = ["start_token", "end_token"]
    processor = SquadProcessor(tokenizer=tokenizer,
                               max_seq_len=20,
                               doc_stride=10,
                               max_query_length=6,
                               train_filename="train-sample.json",
                               dev_filename="dev-sample.json",
                               test_filename=None,
                               data_dir=Path("samples/qa"),
                               label_list=label_list,
                               metric="squad")

    data_silo = DataSilo(processor=processor,
                         batch_size=batch_size,
                         max_processes=1)
    language_model = LanguageModel.load(base_LM_model)
    prediction_head = QuestionAnsweringHead()
    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_token"],
        device=device,
    )

    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=2e-5,
        #optimizer_opts={'name': 'AdamW', 'lr': 2E-05},
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
        device=device)
    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      data_silo=data_silo,
                      epochs=n_epochs,
                      n_gpu=n_gpu,
                      lr_schedule=lr_schedule,
                      evaluate_every=evaluate_every,
                      device=device)
    trainer.train()
    save_dir = Path("testsave/qa")
    model.save(save_dir)
    processor.save(save_dir)

    inferencer = Inferencer.load(save_dir,
                                 batch_size=2,
                                 gpu=False,
                                 num_processes=0)

    qa_format_1 = [{
        "questions": ["Who counted the game among the best ever made?"],
        "text":
        "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
    }]
    qa_format_2 = [{
        "qas": ["Who counted the game among the best ever made?"],
        "context":
        "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created.",
    }]

    result1 = inferencer.inference_from_dicts(dicts=qa_format_1)
    result2 = inferencer.inference_from_dicts(dicts=qa_format_2)
    assert result1 == result2

Exemple #25

0

Afficher le fichier

Fichier : question_answering_accuracy.py Projet : vumichien/FARM

def test_evaluation():
    ##########################
    ########## Settings
    ##########################
    lang_model = "deepset/roberta-base-squad2"
    do_lower_case = False

    test_assertions = True

    data_dir = Path("testsave/data/squad20")
    evaluation_filename = "dev-v2.0.json"

    device, n_gpu = initialize_device_settings(use_cuda=True)

    # loading models and evals
    model = AdaptiveModel.convert_from_transformers(
        lang_model, device=device, task_type="question_answering")
    model.prediction_heads[0].no_ans_boost = 0
    model.prediction_heads[0].n_best = 1

    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=256,
        label_list=["start_token", "end_token"],
        metric="squad",
        train_filename=None,
        dev_filename=None,
        dev_split=0,
        test_filename=evaluation_filename,
        data_dir=data_dir,
        doc_stride=128,
    )

    starttime = time()

    data_silo = DataSilo(processor=processor, batch_size=50)
    model.connect_heads_with_processor(data_silo.processor.tasks,
                                       require_labels=True)
    evaluator = Evaluator(data_loader=data_silo.get_data_loader("test"),
                          tasks=data_silo.processor.tasks,
                          device=device)

    # 1. Test FARM internal evaluation
    results = evaluator.eval(model)
    f1_score = results[0]["f1"] * 100
    em_score = results[0]["EM"] * 100
    tnrecall = results[0]["top_n_recall"] * 100
    elapsed = time() - starttime
    print(results)
    print(elapsed)

    gold_EM = 77.7478
    gold_f1 = 82.1557
    gold_tnrecall = 84.0646  # top 1 recall
    gold_elapsed = 70  # 4x V100
    if test_assertions:
        np.testing.assert_allclose(
            em_score,
            gold_EM,
            rtol=0.001,
            err_msg=f"FARM Eval changed for EM by: {em_score-gold_EM}")
        np.testing.assert_allclose(
            f1_score,
            gold_f1,
            rtol=0.001,
            err_msg=f"FARM Eval changed for f1 score by: {f1_score-gold_f1}")
        np.testing.assert_allclose(
            tnrecall,
            gold_tnrecall,
            rtol=0.001,
            err_msg=f"FARM Eval changed for top 1 recall by: {em_score-gold_EM}"
        )
        np.testing.assert_allclose(
            elapsed,
            gold_elapsed,
            rtol=0.1,
            err_msg=
            f"FARM Eval speed changed significantly by: {elapsed - gold_elapsed} seconds"
        )

    # 2. Test FARM predictions with outside eval script
    starttime = time()
    model = Inferencer(model=model,
                       processor=processor,
                       task_type="question_answering",
                       batch_size=50,
                       gpu=device.type == "cuda")
    filename = data_dir / evaluation_filename
    result = model.inference_from_file(file=filename)

    elapsed = time() - starttime

    os.makedirs("../testsave", exist_ok=True)
    write_squad_predictions(predictions=result,
                            predictions_filename=filename,
                            out_filename="testsave/predictions.json")
    script_params = {
        "data_file": filename,
        "pred_file": "testsave/predictions.json",
        "na_prob_thresh": 1,
        "na_prob_file": False,
        "out_file": False
    }
    results_official = squad_evaluation.main(OPTS=DotMap(script_params))
    f1_score = results_official["f1"]
    em_score = results_official["exact"]

    gold_EM = 78.4890
    gold_f1 = 81.7104
    gold_elapsed = 66  # 4x V100
    print(elapsed)
    if test_assertions:
        np.testing.assert_allclose(
            em_score,
            gold_EM,
            rtol=0.001,
            err_msg=
            f"Eval with official script changed for EM by: {em_score - gold_EM}"
        )
        np.testing.assert_allclose(
            f1_score,
            gold_f1,
            rtol=0.001,
            err_msg=
            f"Eval with official script changed for f1 score by: {f1_score - gold_f1}"
        )
        np.testing.assert_allclose(
            elapsed,
            gold_elapsed,
            rtol=0.1,
            err_msg=
            f"Inference speed changed significantly by: {elapsed - gold_elapsed} seconds"
        )

Exemple #26

0

Afficher le fichier

Fichier : infer.py Projet : himanshurawlani/FARM

    def load(
        cls,
        model_name_or_path,
        batch_size=4,
        gpu=False,
        task_type=None,
        return_class_probs=False,
        strict=True,
        max_seq_len=256,
        doc_stride=128,
        extraction_layer=None,
        extraction_strategy=None,
        s3e_stats=None,
        num_processes=None,
        disable_tqdm=False,
        tokenizer_class=None,
        use_fast=False,
        tokenizer_args=None,
        dummy_ph=False,
        benchmarking=False,
    ):
        """
        Load an Inferencer incl. all relevant components (model, tokenizer, processor ...) either by

        1. specifying a public name from transformers' model hub (https://huggingface.co/models)
        2. or pointing to a local directory it is saved in.

        :param model_name_or_path: Local directory or public name of the model to load.
        :type model_name_or_path: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param task_type: Type of task the model should be used for. Currently supporting:
                          "embeddings", "question_answering", "text_classification", "ner". More coming soon...
        :param task_type: str
        :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in
                       the PredictionHead (see torch.nn.module.load_state_dict()).
                       Set to `False` for backwards compatibility with PHs saved with older version of FARM.
        :type strict: bool
        :param max_seq_len: maximum length of one text sample
        :type max_seq_len: int
        :param doc_stride: Only QA: When input text is longer than max_seq_len it gets split into parts, strided by doc_stride
        :type doc_stride: int
        :param extraction_strategy: Strategy to extract vectors. Choices: 'cls_token' (sentence vector), 'reduce_mean'
                               (sentence vector), reduce_max (sentence vector), 'per_token' (individual token vectors)
        :type extraction_strategy: str
        :param extraction_layer: number of layer from which the embeddings shall be extracted. Default: -1 (very last layer).
        :type extraction_layer: int
        :param s3e_stats: Stats of a fitted S3E model as returned by `fit_s3e_on_corpus()`
                          (only needed for task_type="embeddings" and extraction_strategy = "s3e")
        :type s3e_stats: dict
        :param num_processes: the number of processes for `multiprocessing.Pool`. Set to value of 0 to disable
                              multiprocessing. Set to None to let Inferencer use all CPU cores minus one. If you want to
                              debug the Language Model, you might need to disable multiprocessing!
                              **Warning!** If you use multiprocessing you have to close the
                              `multiprocessing.Pool` again! To do so call
                              :func:`~farm.infer.Inferencer.close_multiprocessing_pool` after you are
                              done using this class. The garbage collector will not do this for you!
        :type num_processes: int
        :param disable_tqdm: Whether to disable tqdm logging (can get very verbose in multiprocessing)
        :type disable_tqdm: bool
        :param tokenizer_class: (Optional) Name of the tokenizer class to load (e.g. `BertTokenizer`)
        :type tokenizer_class: str
        :param use_fast: (Optional, False by default) Indicate if FARM should try to load the fast version of the tokenizer (True) or
            use the Python one (False).
        :param tokenizer_args: (Optional) Will be passed to the Tokenizer ``__init__`` method.
            See https://huggingface.co/transformers/main_classes/tokenizer.html and detailed tokenizer documentation
            on `Hugging Face Transformers <https://huggingface.co/transformers/>`_.
        :type tokenizer_args: dict
        :type use_fast: bool
        :param dummy_ph: If True, methods of the prediction head will be replaced
                             with a dummy method. This is used to isolate lm run time from ph run time.
        :type dummy_ph: bool
        :param benchmarking: If True, a benchmarking object will be initialised within the class and
                             certain parts of the code will be timed for benchmarking. Should be kept
                             False if not benchmarking since these timing checkpoints require synchronization
                             of the asynchronous Pytorch operations and may slow down the model.
        :type benchmarking: bool
        :return: An instance of the Inferencer.

        """
        if tokenizer_args is None:
            tokenizer_args = {}

        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   use_amp=None)
        name = os.path.basename(model_name_or_path)

        # a) either from local dir
        if os.path.exists(model_name_or_path):
            model = BaseAdaptiveModel.load(load_dir=model_name_or_path,
                                           device=device,
                                           strict=strict)
            if task_type == "embeddings":
                processor = InferenceProcessor.load_from_dir(
                    model_name_or_path)
            else:
                processor = Processor.load_from_dir(model_name_or_path)

            # override processor attributes loaded from config file with inferencer params
            processor.max_seq_len = max_seq_len
            if hasattr(processor, "doc_stride"):
                assert doc_stride < max_seq_len, "doc_stride is longer than max_seq_len. This means that there will be gaps " \
                                                 "as the passage windows slide, causing the model to skip over parts of the document. "\
                                                 "Please set a lower value for doc_stride (Suggestions: doc_stride=128, max_seq_len=384) "
                processor.doc_stride = doc_stride

        # b) or from remote transformers model hub
        else:
            logger.info(
                f"Could not find `{model_name_or_path}` locally. Try to download from model hub ..."
            )
            if not task_type:
                raise ValueError(
                    "Please specify the 'task_type' of the model you want to load from transformers. "
                    "Valid options for arg `task_type`:"
                    "'question_answering', 'embeddings', 'text_classification', 'ner'"
                )

            model = AdaptiveModel.convert_from_transformers(
                model_name_or_path, device, task_type)
            config = AutoConfig.from_pretrained(model_name_or_path)
            tokenizer = Tokenizer.load(
                model_name_or_path,
                tokenizer_class=tokenizer_class,
                use_fast=use_fast,
                **tokenizer_args,
            )

            # TODO infer task_type automatically from config (if possible)
            if task_type == "question_answering":
                processor = SquadProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    label_list=["start_token", "end_token"],
                    metric="squad",
                    data_dir="data",
                    doc_stride=doc_stride)
            elif task_type == "embeddings":
                processor = InferenceProcessor(tokenizer=tokenizer,
                                               max_seq_len=max_seq_len)

            elif task_type == "text_classification":
                label_list = list(config.id2label[id]
                                  for id in range(len(config.id2label)))
                processor = TextClassificationProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    data_dir="data",
                    label_list=label_list,
                    label_column_name="label",
                    metric="acc",
                    quote_char='"',
                )
            elif task_type == "ner":
                label_list = list(config.label2id.keys())
                processor = NERProcessor(tokenizer=tokenizer,
                                         max_seq_len=max_seq_len,
                                         data_dir="data",
                                         metric="seq_f1",
                                         label_list=label_list)
            else:
                raise ValueError(
                    f"`task_type` {task_type} is not supported yet. "
                    f"Valid options for arg `task_type`: 'question_answering', "
                    f"'embeddings', 'text_classification', 'ner'")

        if not isinstance(model, ONNXAdaptiveModel):
            model, _ = optimize_model(model=model,
                                      device=device,
                                      local_rank=-1,
                                      optimizer=None)
        return cls(model,
                   processor,
                   task_type=task_type,
                   batch_size=batch_size,
                   gpu=gpu,
                   name=name,
                   return_class_probs=return_class_probs,
                   extraction_strategy=extraction_strategy,
                   extraction_layer=extraction_layer,
                   s3e_stats=s3e_stats,
                   num_processes=num_processes,
                   disable_tqdm=disable_tqdm,
                   benchmarking=benchmarking,
                   dummy_ph=dummy_ph)

Exemple #27

0

Afficher le fichier

def test_qa(caplog):
    caplog.set_level(logging.CRITICAL)

    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=False)
    batch_size = 2
    n_epochs = 1
    evaluate_every = 4
    base_LM_model = "bert-base-cased"

    tokenizer = Tokenizer.load(pretrained_model_name_or_path=base_LM_model,
                               do_lower_case=False)
    label_list = ["start_token", "end_token"]
    processor = SquadProcessor(tokenizer=tokenizer,
                               max_seq_len=20,
                               doc_stride=10,
                               max_query_length=6,
                               train_filename="train-sample.json",
                               dev_filename="dev-sample.json",
                               test_filename=None,
                               data_dir="samples/qa",
                               label_list=label_list,
                               metric="squad")

    data_silo = DataSilo(processor=processor, batch_size=batch_size)
    language_model = LanguageModel.load(base_LM_model)
    prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)])
    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[prediction_head],
        embeds_dropout_prob=0.1,
        lm_output_types=["per_token"],
        device=device,
    )

    optimizer, warmup_linear = initialize_optimizer(
        model=model,
        learning_rate=1e-5,
        warmup_proportion=0.2,
        n_batches=len(data_silo.loaders["train"]),
        n_epochs=n_epochs,
    )
    trainer = Trainer(
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=n_epochs,
        n_gpu=n_gpu,
        warmup_linear=warmup_linear,
        evaluate_every=evaluate_every,
        device=device,
    )
    model = trainer.train(model)
    save_dir = "testsave/qa"
    model.save(save_dir)
    processor.save(save_dir)

    QA_input = [{
        "questions": ["In what country is Normandy"],
        "text":
        'The Normans gave their name to Normandy, a region in France.',
    }]

    model = Inferencer.load(save_dir)
    result = model.inference_from_dicts(dicts=QA_input,
                                        use_multiprocessing=False)
    assert isinstance(
        result[0]["predictions"][0]["answers"][0]["offset_start"], int)

Exemple #28

0

Afficher le fichier

Fichier : question_answering_debug.py Projet : yizhiwan/FARM

full_predictions_file = save_dir + "/full_predictions.json"
inference_multiprocessing = False
train = False
inference = True

if train:
    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=base_LM_model)
    # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset
    label_list = ["start_token", "end_token"]
    metric = "squad"
    processor = SquadProcessor(
        tokenizer=tokenizer,
        max_seq_len=384,
        label_list=label_list,
        metric=metric,
        train_filename=train_filename,
        dev_filename=dev_filename,
        test_filename=None,
        data_dir="../data/squad20",
    )

    # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
    data_silo = DataSilo(processor=processor,
                         batch_size=batch_size,
                         distributed=False)

    # 4. Create an AdaptiveModel
    # a) which consists of a pretrained language model as a basis
    language_model = LanguageModel.load(base_LM_model)
    # b) and a prediction head on top that is suited for our task => Question Answering
    prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)])