Ejemplo n.º 1
0
def convert_to_transformers():
    farm_input_dir = Path(
        "../saved_models/farm-bert-base-german-cased-hatespeech-GermEval18Coarse"
    )
    transformers_output_dir = "../saved_models/bert-base-german-cased-hatespeech-GermEval18Coarse"
    #
    # # # load from FARM format
    model = AdaptiveModel.load(farm_input_dir, device="cpu")
    processor = Processor.load_from_dir(farm_input_dir)
    model.connect_heads_with_processor(processor.tasks)

    # convert to transformers
    transformer_model = Converter.convert_to_transformers(model)[0]
    # # Alternative way to convert to transformers:
    #transformer_model = model.convert_to_transformers()[0]

    # save it (note: transformers use str instead of Path objects)
    Path(transformers_output_dir).mkdir(parents=True, exist_ok=True)
    transformer_model.save_pretrained(transformers_output_dir)
    processor.tokenizer.save_pretrained(transformers_output_dir)

    # run predictions (using transformers)
    nlp = pipeline('sentiment-analysis',
                   model=str(transformers_output_dir),
                   tokenizer=str(transformers_output_dir))
    res = nlp("Was ein scheiß Nazi!")
    pprint.pprint(res)
Ejemplo n.º 2
0
    def __init__(self, load_dir, batch_size=4, gpu=False):
        """
        Initializes inferencer from directory with saved model.
        :param load_dir: Directory containing a saved AdaptiveModel
        :type load_dir str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        """
        # Init device and distributed settings
        device, n_gpu = initialize_device_settings(
            use_cuda=gpu, local_rank=-1, fp16=False
        )

        self.processor = Processor.load_from_dir(load_dir)
        self.model = AdaptiveModel.load(load_dir, device)
        self.model.eval()
        self.batch_size = batch_size
        self.device = device
        self.language = self.model.language_model.language
        # TODO adjust for multiple prediction heads
        if len(self.model.prediction_heads) == 1:
            self.prediction_type = self.model.prediction_heads[0].model_type
            self.label_map = self.processor.label_maps[0]
        elif len(self.model.prediction_heads) == 0:
            self.prediction_type = "embedder"
        self.name = os.path.basename(load_dir)
        set_all_seeds(42, n_gpu)
Ejemplo n.º 3
0
    def load(cls, load_dir, batch_size=4, gpu=False, embedder_only=False):
        """
        Initializes inferencer from directory with saved model.

        :param load_dir: Directory where the saved model is located.
        :type load_dir: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param embedder_only: If true, a faster processor (InferenceProcessor) is loaded. This should only be used
        for extracting embeddings (no downstream predictions).
        :type embedder_only: bool
        :return: An instance of the Inferencer.
        """

        device, n_gpu = initialize_device_settings(
            use_cuda=gpu, local_rank=-1, fp16=False
        )

        model = AdaptiveModel.load(load_dir, device)
        if embedder_only:
            # model.prediction_heads = []
            processor = InferenceProcessor.load_from_dir(load_dir)
        else:
            processor = Processor.load_from_dir(load_dir)

        name = os.path.basename(load_dir)
        return cls(model, processor, batch_size=batch_size, gpu=gpu, name=name)
Ejemplo n.º 4
0
    def load(
        cls,
        load_dir,
        batch_size=4,
        gpu=False,
        embedder_only=False,
        return_class_probs=False,
        strict=True
    ):
        """
        Initializes Inferencer from directory with saved model.

        :param load_dir: Directory where the saved model is located.
        :type load_dir: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param embedder_only: If true, a faster processor (InferenceProcessor) is loaded. This should only be used for extracting embeddings (no downstream predictions).
        :type embedder_only: bool
        :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in
                       the PredictionHead (see torch.nn.module.load_state_dict()).
                       Set to `False` for backwards compatibility with PHs saved with older version of FARM.
        :type strict: bool
        :return: An instance of the Inferencer.

        """

        device, n_gpu = initialize_device_settings(use_cuda=gpu, local_rank=-1, fp16=False)

        model = AdaptiveModel.load(load_dir, device, strict=strict)
        if embedder_only:
            # model.prediction_heads = []
            processor = InferenceProcessor.load_from_dir(load_dir)
        else:
            processor = Processor.load_from_dir(load_dir)

        name = os.path.basename(load_dir)
        return cls(
            model,
            processor,
            batch_size=batch_size,
            gpu=gpu,
            name=name,
            return_class_probs=return_class_probs,
        )
Ejemplo n.º 5
0
    def __init__(self, load_dir, batch_size=4, gpu=False):
        # Init device and distributed settings
        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   fp16=False)

        self.processor = Processor.load_from_dir(load_dir)
        self.model = AdaptiveModel.load(load_dir, device)
        self.model.eval()
        self.batch_size = batch_size
        self.device = device
        self.language = self.model.language_model.language
        # TODO adjust for multiple prediction heads
        self.prediction_type = self.model.prediction_heads[0].model_type
        self.name = os.path.basename(load_dir)
        self.label_map = self.processor.label_maps[0]
        set_all_seeds(42, 1)
Ejemplo n.º 6
0
    def load(cls, load_dir, batch_size=4, gpu=False):
        """
        Initializes inferencer from directory with saved model.
        :param load_dir: Directory where the saved model is located.
        :type load_dir: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :return: An instance of the Inferencer.
        """

        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   fp16=False)

        model = AdaptiveModel.load(load_dir, device)
        processor = Processor.load_from_dir(load_dir)
        name = os.path.basename(load_dir)
        return cls(model, processor, batch_size=batch_size, gpu=gpu, name=name)
Ejemplo n.º 7
0
Archivo: infer.py Proyecto: wwmmqq/FARM
    def load(cls,
             model_name_or_path,
             batch_size=4,
             gpu=False,
             task_type=None,
             return_class_probs=False,
             strict=True,
             max_seq_len=256):
        """
        Load an Inferencer incl. all relevant components (model, tokenizer, processor ...) either by

        1. specifying a public name from transformers' model hub (https://huggingface.co/models)
        2. or pointing to a local directory it is saved in.

        :param model_name_or_path: Local directory or public name of the model to load.
        :type model_name_or_path: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param task_type: Type of task the model should be used for. Currently supporting:
                          "embeddings", "question_answering", "text_classification". More coming soon...
        :param task_type: str
        :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in
                       the PredictionHead (see torch.nn.module.load_state_dict()).
                       Set to `False` for backwards compatibility with PHs saved with older version of FARM.
        :type strict: bool
        :return: An instance of the Inferencer.

        """

        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   use_amp=None)
        name = os.path.basename(model_name_or_path)

        # a) either from local dir
        if os.path.exists(model_name_or_path):
            model = AdaptiveModel.load(model_name_or_path,
                                       device,
                                       strict=strict)
            if task_type == "embeddings":
                processor = InferenceProcessor.load_from_dir(
                    model_name_or_path)
            else:
                processor = Processor.load_from_dir(model_name_or_path)

        # b) or from remote transformers model hub
        else:
            logger.info(
                f"Could not find `{model_name_or_path}` locally. Try to download from model hub ..."
            )
            if not task_type:
                raise ValueError(
                    "Please specify the 'task_type' of the model you want to load from transformers. "
                    "Valid options for arg `task_type`:"
                    "'question_answering', 'embeddings', 'text_classification'"
                )

            model = AdaptiveModel.convert_from_transformers(
                model_name_or_path, device, task_type)
            config = AutoConfig.from_pretrained(model_name_or_path)
            tokenizer = Tokenizer.load(model_name_or_path)

            # TODO infer task_type automatically from config (if possible)
            if task_type == "question_answering":
                processor = SquadProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    label_list=["start_token", "end_token"],
                    metric="squad",
                    data_dir=None,
                )
            elif task_type == "embeddings":
                processor = InferenceProcessor(tokenizer=tokenizer,
                                               max_seq_len=max_seq_len)

            elif task_type == "text_classification":
                label_list = list(config.id2label[id]
                                  for id in range(len(config.id2label)))
                processor = TextClassificationProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    data_dir=None,
                    label_list=label_list,
                    label_column_name="label",
                    metric="acc",
                    quote_char='"',
                )

            # elif task_type == "multilabel-classification":
            #     # label_list = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
            #     label_list = list(config.label2id.keys())
            #
            #     processor = TextClassificationProcessor(tokenizer=tokenizer,
            #                                             max_seq_len=max_seq_len,
            #                                             data_dir=None,
            #                                             label_list=label_list,
            #                                             label_column_name="label",
            #                                             metric="acc",
            #                                             quote_char='"',
            #                                             multilabel=True,
            #                                             )

            elif task_type == "ner":
                label_list = list(config.label2id.keys())
                processor = NERProcessor(tokenizer=tokenizer,
                                         max_seq_len=max_seq_len,
                                         data_dir=None,
                                         metric="seq_f1",
                                         label_list=label_list)
            else:
                raise ValueError(
                    f"`task_type` {task_type} is not supported yet. "
                    f"Valid options for arg `task_type`: 'question_answering', 'embeddings', 'text_classification'"
                )

        return cls(
            model,
            processor,
            batch_size=batch_size,
            gpu=gpu,
            name=name,
            return_class_probs=return_class_probs,
        )
Ejemplo n.º 8
0
def run_experiment(args):

    logger.info("\n***********************************************"
                f"\n************* Experiment: {args.task.name} ************"
                "\n************************************************")
    ml_logger = MlLogger(tracking_uri=args.logging.mlflow_url)
    ml_logger.init_experiment(
        experiment_name=args.logging.mlflow_experiment,
        run_name=args.logging.mlflow_run_name,
        nested=args.logging.mlflow_nested,
    )

    validate_args(args)
    distributed = bool(args.general.local_rank != -1)

    # Init device and distributed settings
    device, n_gpu = initialize_device_settings(
        use_cuda=args.general.cuda,
        local_rank=args.general.local_rank,
        fp16=args.general.fp16,
    )

    args.parameter.batch_size = int(args.parameter.batch_size //
                                    args.parameter.gradient_accumulation_steps)
    if n_gpu > 1:
        args.parameter.batch_size = args.parameter.batch_size * n_gpu
    set_all_seeds(args.general.seed)

    # Prepare Data
    tokenizer = Tokenizer.load(args.parameter.model,
                               do_lower_case=args.parameter.lower_case)

    processor = Processor.load(
        tokenizer=tokenizer,
        max_seq_len=args.parameter.max_seq_len,
        data_dir=args.general.data_dir,
        **args.task.toDict(
        ),  # args is of type DotMap and needs conversion to std python dicts
    )

    data_silo = DataSilo(
        processor=processor,
        batch_size=args.parameter.batch_size,
        distributed=distributed,
    )

    class_weights = None
    if args.parameter.balance_classes:
        task_names = list(processor.tasks.keys())
        if len(task_names) > 1:
            raise NotImplementedError(
                f"Balancing classes is currently not supported for multitask experiments. Got tasks:  {task_names} "
            )
        class_weights = data_silo.calculate_class_weights(
            task_name=task_names[0])

    model = get_adaptive_model(
        lm_output_type=args.parameter.lm_output_type,
        prediction_heads=args.parameter.prediction_head,
        layer_dims=args.parameter.layer_dims,
        model=args.parameter.model,
        device=device,
        class_weights=class_weights,
        embeds_dropout_prob=args.parameter.embeds_dropout_prob,
    )

    # Init optimizer

    # TODO: warmup linear is sometimes NONE depending on fp16 - is there a neater way to handle this?
    optimizer, warmup_linear = initialize_optimizer(
        model=model,
        learning_rate=args.parameter.learning_rate,
        warmup_proportion=args.parameter.warmup_proportion,
        loss_scale=args.general.loss_scale,
        fp16=args.general.fp16,
        n_batches=len(data_silo.loaders["train"]),
        grad_acc_steps=args.parameter.gradient_accumulation_steps,
        n_epochs=args.parameter.epochs,
    )

    trainer = Trainer(
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=args.parameter.epochs,
        n_gpu=n_gpu,
        grad_acc_steps=args.parameter.gradient_accumulation_steps,
        fp16=args.general.fp16,
        local_rank=args.general.local_rank,
        warmup_linear=warmup_linear,
        evaluate_every=args.logging.eval_every,
        device=device,
    )

    model = trainer.train(model)

    model_name = (
        f"{model.language_model.name}-{model.language_model.language}-{args.task.name}"
    )
    processor.save(f"{args.general.output_dir}/{model_name}")
    model.save(f"{args.general.output_dir}/{model_name}")
Ejemplo n.º 9
0
    def load(
        cls,
        model_name_or_path,
        revision=None,
        batch_size=4,
        gpu=False,
        task_type=None,
        return_class_probs=False,
        strict=True,
        max_seq_len=256,
        doc_stride=128,
        extraction_layer=None,
        extraction_strategy=None,
        s3e_stats=None,
        num_processes=None,
        disable_tqdm=False,
        tokenizer_class=None,
        use_fast=True,
        tokenizer_args=None,
        multithreading_rust=True,
        dummy_ph=False,
        benchmarking=False,
    ):
        """
        Load an Inferencer incl. all relevant components (model, tokenizer, processor ...) either by

        1. specifying a public name from transformers' model hub (https://huggingface.co/models)
        2. or pointing to a local directory it is saved in.

        :param model_name_or_path: Local directory or public name of the model to load.
        :type model_name_or_path: str
        :param revision: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash.
        :type revision: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param task_type: Type of task the model should be used for. Currently supporting:
                          "embeddings", "question_answering", "text_classification", "ner". More coming soon...
        :param task_type: str
        :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in
                       the PredictionHead (see torch.nn.module.load_state_dict()).
                       Set to `False` for backwards compatibility with PHs saved with older version of FARM.
        :type strict: bool
        :param max_seq_len: maximum length of one text sample
        :type max_seq_len: int
        :param doc_stride: Only QA: When input text is longer than max_seq_len it gets split into parts, strided by doc_stride
        :type doc_stride: int
        :param extraction_strategy: Strategy to extract vectors. Choices: 'cls_token' (sentence vector), 'reduce_mean'
                               (sentence vector), reduce_max (sentence vector), 'per_token' (individual token vectors)
        :type extraction_strategy: str
        :param extraction_layer: number of layer from which the embeddings shall be extracted. Default: -1 (very last layer).
        :type extraction_layer: int
        :param s3e_stats: Stats of a fitted S3E model as returned by `fit_s3e_on_corpus()`
                          (only needed for task_type="embeddings" and extraction_strategy = "s3e")
        :type s3e_stats: dict
        :param num_processes: the number of processes for `multiprocessing.Pool`. Set to value of 0 to disable
                              multiprocessing. Set to None to let Inferencer use all CPU cores minus one. If you want to
                              debug the Language Model, you might need to disable multiprocessing!
                              **Warning!** If you use multiprocessing you have to close the
                              `multiprocessing.Pool` again! To do so call
                              :func:`~farm.infer.Inferencer.close_multiprocessing_pool` after you are
                              done using this class. The garbage collector will not do this for you!
        :type num_processes: int
        :param disable_tqdm: Whether to disable tqdm logging (can get very verbose in multiprocessing)
        :type disable_tqdm: bool
        :param tokenizer_class: (Optional) Name of the tokenizer class to load (e.g. `BertTokenizer`)
        :type tokenizer_class: str
        :param use_fast: (Optional, True by default) Indicate if FARM should try to load the fast version of the tokenizer (True) or
            use the Python one (False).
        :type use_fast: bool
        :param tokenizer_args: (Optional) Will be passed to the Tokenizer ``__init__`` method.
            See https://huggingface.co/transformers/main_classes/tokenizer.html and detailed tokenizer documentation
            on `Hugging Face Transformers <https://huggingface.co/transformers/>`_.
        :type tokenizer_args: dict
        :param multithreading_rust: Whether to allow multithreading in Rust, e.g. for FastTokenizers.
                                    Note: Enabling multithreading in Rust AND multiprocessing in python might cause
                                    deadlocks.
        :type multithreading_rust: bool
        :param dummy_ph: If True, methods of the prediction head will be replaced
                             with a dummy method. This is used to isolate lm run time from ph run time.
        :type dummy_ph: bool
        :param benchmarking: If True, a benchmarking object will be initialised within the class and
                             certain parts of the code will be timed for benchmarking. Should be kept
                             False if not benchmarking since these timing checkpoints require synchronization
                             of the asynchronous Pytorch operations and may slow down the model.
        :type benchmarking: bool
        :return: An instance of the Inferencer.

        """
        if tokenizer_args is None:
            tokenizer_args = {}

        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   use_amp=None)
        name = os.path.basename(model_name_or_path)

        # a) either from local dir
        if os.path.exists(model_name_or_path):
            model = BaseAdaptiveModel.load(load_dir=model_name_or_path,
                                           device=device,
                                           strict=strict)
            if task_type == "embeddings":
                processor = InferenceProcessor.load_from_dir(
                    model_name_or_path)
            else:
                processor = Processor.load_from_dir(model_name_or_path)

        # b) or from remote transformers model hub
        else:
            if not task_type:
                raise ValueError(
                    "Please specify the 'task_type' of the model you want to load from transformers. "
                    "Valid options for arg `task_type`:"
                    "'question_answering', 'embeddings', 'text_classification', 'ner'"
                )

            model = AdaptiveModel.convert_from_transformers(
                model_name_or_path,
                revision=revision,
                device=device,
                task_type=task_type)
            processor = Processor.convert_from_transformers(
                model_name_or_path,
                revision=revision,
                task_type=task_type,
                max_seq_len=max_seq_len,
                doc_stride=doc_stride,
                tokenizer_class=tokenizer_class,
                tokenizer_args=tokenizer_args,
                use_fast=use_fast)

        # override processor attributes loaded from config or HF with inferencer params
        processor.max_seq_len = max_seq_len
        processor.multithreading_rust = multithreading_rust
        if hasattr(processor, "doc_stride"):
            assert doc_stride < max_seq_len, "doc_stride is longer than max_seq_len. This means that there will be gaps " \
                                             "as the passage windows slide, causing the model to skip over parts of the document. " \
                                             "Please set a lower value for doc_stride (Suggestions: doc_stride=128, max_seq_len=384) "
            processor.doc_stride = doc_stride

        return cls(model,
                   processor,
                   task_type=task_type,
                   batch_size=batch_size,
                   gpu=gpu,
                   name=name,
                   return_class_probs=return_class_probs,
                   extraction_strategy=extraction_strategy,
                   extraction_layer=extraction_layer,
                   s3e_stats=s3e_stats,
                   num_processes=num_processes,
                   disable_tqdm=disable_tqdm,
                   benchmarking=benchmarking,
                   dummy_ph=dummy_ph)
Ejemplo n.º 10
0
    def convert_to_onnx(cls,
                        model_name,
                        output_path,
                        task_type,
                        convert_to_float16=False,
                        quantize=False,
                        opset_version=11):
        """
        Convert a PyTorch model from transformers hub to an ONNX Model.

        :param model_name: transformers model name
        :type model_name: str
        :param output_path: output Path to write the converted to
        :type output_path: Path
        :param task_type: Type of task for the model. Available options: "embeddings", "question_answering",
                          "text_classification", "ner".
        :param convert_to_float16: By default, the model use float32 precision. With half precision of flaot16, inference
                                should be faster on Nvidia GPUs with Tensor core like T4 or V100. On older GPUs, float32
                                might be more performant.
        :type convert_to_float16: bool
        :param quantize: convert floating point number to integers
        :type quantize: bool
        :param opset_version: ONNX opset version
        :type opset_version: int
        :return:
        """
        language_model_class = LanguageModel.get_language_model_class(
            model_name)
        if language_model_class not in ["Bert", "Roberta", "XLMRoberta"]:
            raise Exception(
                "The current ONNX conversion only support 'BERT', 'RoBERTa', and 'XLMRoberta' models."
            )

        task_type_to_pipeline_map = {
            "question_answering": "question-answering",
            "embeddings": "feature-extraction",
            "ner": "ner"
        }

        convert(pipeline_name=task_type_to_pipeline_map[task_type],
                framework="pt",
                model=model_name,
                output=output_path / "model.onnx",
                opset=opset_version,
                use_external_format=True
                if language_model_class is "XLMRoberta" else False)

        # save processor & model config files that are needed when loading the model with the FARM Inferencer
        processor = Processor.convert_from_transformers(
            tokenizer_name_or_path=model_name,
            task_type=task_type,
            max_seq_len=256,
            doc_stride=128,
            use_fast=True)
        processor.save(output_path)
        model = AdaptiveModel.convert_from_transformers(model_name,
                                                        device="cpu",
                                                        task_type=task_type)
        model.save(output_path)
        os.remove(
            output_path / "language_model.bin"
        )  # remove the actual PyTorch model(only configs are required)

        onnx_model_config = {
            "task_type": task_type,
            "onnx_opset_version": opset_version,
            "language_model_class": language_model_class,
            "language": model.language_model.language
        }
        with open(output_path / "onnx_model_config.json", "w") as f:
            json.dump(onnx_model_config, f)

        if convert_to_float16:
            from onnxruntime_tools import optimizer
            config = AutoConfig.from_pretrained(model_name)
            optimized_model = optimizer.optimize_model(
                input=str(output_path / "model.onnx"),
                model_type='bert',
                num_heads=config.num_hidden_layers,
                hidden_size=config.hidden_size)
            optimized_model.convert_model_float32_to_float16()
            optimized_model.save_model_to_file("model.onnx")

        if quantize:
            quantize_model(output_path / "model.onnx")
Ejemplo n.º 11
0
Archivo: infer.py Proyecto: yon606/FARM
    def load(cls,
             model_name_or_path,
             batch_size=4,
             gpu=False,
             task_type=None,
             return_class_probs=False,
             strict=True,
             max_seq_len=256,
             doc_stride=128,
             extraction_layer=None,
             extraction_strategy=None,
             s3e_stats=None,
             num_processes=None,
             disable_tqdm=False):
        """
        Load an Inferencer incl. all relevant components (model, tokenizer, processor ...) either by

        1. specifying a public name from transformers' model hub (https://huggingface.co/models)
        2. or pointing to a local directory it is saved in.

        :param model_name_or_path: Local directory or public name of the model to load.
        :type model_name_or_path: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param task_type: Type of task the model should be used for. Currently supporting:
                          "embeddings", "question_answering", "text_classification", "ner". More coming soon...
        :param task_type: str
        :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in
                       the PredictionHead (see torch.nn.module.load_state_dict()).
                       Set to `False` for backwards compatibility with PHs saved with older version of FARM.
        :type strict: bool
        :param max_seq_len: maximum length of one text sample
        :type max_seq_len: int
        :param doc_stride: Only QA: When input text is longer than max_seq_len it gets split into parts, strided by doc_stride
        :type doc_stride: int
        :param extraction_strategy: Strategy to extract vectors. Choices: 'cls_token' (sentence vector), 'reduce_mean'
                               (sentence vector), reduce_max (sentence vector), 'per_token' (individual token vectors)
        :type extraction_strategy: str
        :param extraction_layer: number of layer from which the embeddings shall be extracted. Default: -1 (very last layer).
        :type extraction_layer: int
        :param s3e_stats: Stats of a fitted S3E model as returned by `fit_s3e_on_corpus()`
                          (only needed for task_type="embeddings" and extraction_strategy = "s3e")
        :type s3e_stats: dict
        :param num_processes: the number of processes for `multiprocessing.Pool`. Set to value of 0 to disable
                              multiprocessing. Set to None to let Inferencer use all CPU cores. If you want to
                              debug the Language Model, you might need to disable multiprocessing!
        :type num_processes: int
        :param disable_tqdm: Whether to disable tqdm logging (can get very verbose in multiprocessing)
        :type disable_tqdm: bool
        :return: An instance of the Inferencer.

        """

        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   use_amp=None)
        name = os.path.basename(model_name_or_path)

        # a) either from local dir
        if os.path.exists(model_name_or_path):
            model = BaseAdaptiveModel.load(load_dir=model_name_or_path,
                                           device=device,
                                           strict=strict)
            if task_type == "embeddings":
                processor = InferenceProcessor.load_from_dir(
                    model_name_or_path)
            else:
                processor = Processor.load_from_dir(model_name_or_path)

            # override processor attributes loaded from config file with inferencer params
            processor.max_seq_len = max_seq_len
            if hasattr(processor, "doc_stride"):
                processor.doc_stride = doc_stride

        # b) or from remote transformers model hub
        else:
            logger.info(
                f"Could not find `{model_name_or_path}` locally. Try to download from model hub ..."
            )
            if not task_type:
                raise ValueError(
                    "Please specify the 'task_type' of the model you want to load from transformers. "
                    "Valid options for arg `task_type`:"
                    "'question_answering', 'embeddings', 'text_classification', 'ner'"
                )

            model = AdaptiveModel.convert_from_transformers(
                model_name_or_path, device, task_type)
            config = AutoConfig.from_pretrained(model_name_or_path)
            tokenizer = Tokenizer.load(model_name_or_path)

            # TODO infer task_type automatically from config (if possible)
            if task_type == "question_answering":
                processor = SquadProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    label_list=["start_token", "end_token"],
                    metric="squad",
                    data_dir="data",
                    doc_stride=doc_stride)
            elif task_type == "embeddings":
                processor = InferenceProcessor(tokenizer=tokenizer,
                                               max_seq_len=max_seq_len)

            elif task_type == "text_classification":
                label_list = list(config.id2label[id]
                                  for id in range(len(config.id2label)))
                processor = TextClassificationProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    data_dir="data",
                    label_list=label_list,
                    label_column_name="label",
                    metric="acc",
                    quote_char='"',
                )
            elif task_type == "ner":
                label_list = list(config.label2id.keys())
                processor = NERProcessor(tokenizer=tokenizer,
                                         max_seq_len=max_seq_len,
                                         data_dir="data",
                                         metric="seq_f1",
                                         label_list=label_list)
            else:
                raise ValueError(
                    f"`task_type` {task_type} is not supported yet. "
                    f"Valid options for arg `task_type`: 'question_answering', "
                    f"'embeddings', 'text_classification', 'ner'")

        return cls(model,
                   processor,
                   task_type=task_type,
                   batch_size=batch_size,
                   gpu=gpu,
                   name=name,
                   return_class_probs=return_class_probs,
                   extraction_strategy=extraction_strategy,
                   extraction_layer=extraction_layer,
                   s3e_stats=s3e_stats,
                   num_processes=num_processes,
                   disable_tqdm=disable_tqdm)
Ejemplo n.º 12
0
    def load(
        cls,
        model_name_or_path,
        batch_size=4,
        gpu=False,
        task_type=None,
        return_class_probs=False,
        strict=True,
        max_seq_len=256,
        doc_stride=128,
        extraction_layer=None,
        extraction_strategy=None,
        s3e_stats=None,
        num_processes=None,
        disable_tqdm=False,
        tokenizer_class=None,
        use_fast=False,
        tokenizer_args=None,
        dummy_ph=False,
        benchmarking=False,
    ):
        """
        Load an Inferencer incl. all relevant components (model, tokenizer, processor ...) either by

        1. specifying a public name from transformers' model hub (https://huggingface.co/models)
        2. or pointing to a local directory it is saved in.

        :param model_name_or_path: Local directory or public name of the model to load.
        :type model_name_or_path: str
        :param batch_size: Number of samples computed once per batch
        :type batch_size: int
        :param gpu: If GPU shall be used
        :type gpu: bool
        :param task_type: Type of task the model should be used for. Currently supporting:
                          "embeddings", "question_answering", "text_classification", "ner". More coming soon...
        :param task_type: str
        :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in
                       the PredictionHead (see torch.nn.module.load_state_dict()).
                       Set to `False` for backwards compatibility with PHs saved with older version of FARM.
        :type strict: bool
        :param max_seq_len: maximum length of one text sample
        :type max_seq_len: int
        :param doc_stride: Only QA: When input text is longer than max_seq_len it gets split into parts, strided by doc_stride
        :type doc_stride: int
        :param extraction_strategy: Strategy to extract vectors. Choices: 'cls_token' (sentence vector), 'reduce_mean'
                               (sentence vector), reduce_max (sentence vector), 'per_token' (individual token vectors)
        :type extraction_strategy: str
        :param extraction_layer: number of layer from which the embeddings shall be extracted. Default: -1 (very last layer).
        :type extraction_layer: int
        :param s3e_stats: Stats of a fitted S3E model as returned by `fit_s3e_on_corpus()`
                          (only needed for task_type="embeddings" and extraction_strategy = "s3e")
        :type s3e_stats: dict
        :param num_processes: the number of processes for `multiprocessing.Pool`. Set to value of 0 to disable
                              multiprocessing. Set to None to let Inferencer use all CPU cores minus one. If you want to
                              debug the Language Model, you might need to disable multiprocessing!
                              **Warning!** If you use multiprocessing you have to close the
                              `multiprocessing.Pool` again! To do so call
                              :func:`~farm.infer.Inferencer.close_multiprocessing_pool` after you are
                              done using this class. The garbage collector will not do this for you!
        :type num_processes: int
        :param disable_tqdm: Whether to disable tqdm logging (can get very verbose in multiprocessing)
        :type disable_tqdm: bool
        :param tokenizer_class: (Optional) Name of the tokenizer class to load (e.g. `BertTokenizer`)
        :type tokenizer_class: str
        :param use_fast: (Optional, False by default) Indicate if FARM should try to load the fast version of the tokenizer (True) or
            use the Python one (False).
        :param tokenizer_args: (Optional) Will be passed to the Tokenizer ``__init__`` method.
            See https://huggingface.co/transformers/main_classes/tokenizer.html and detailed tokenizer documentation
            on `Hugging Face Transformers <https://huggingface.co/transformers/>`_.
        :type tokenizer_args: dict
        :type use_fast: bool
        :param dummy_ph: If True, methods of the prediction head will be replaced
                             with a dummy method. This is used to isolate lm run time from ph run time.
        :type dummy_ph: bool
        :param benchmarking: If True, a benchmarking object will be initialised within the class and
                             certain parts of the code will be timed for benchmarking. Should be kept
                             False if not benchmarking since these timing checkpoints require synchronization
                             of the asynchronous Pytorch operations and may slow down the model.
        :type benchmarking: bool
        :return: An instance of the Inferencer.

        """
        if tokenizer_args is None:
            tokenizer_args = {}

        device, n_gpu = initialize_device_settings(use_cuda=gpu,
                                                   local_rank=-1,
                                                   use_amp=None)
        name = os.path.basename(model_name_or_path)

        # a) either from local dir
        if os.path.exists(model_name_or_path):
            model = BaseAdaptiveModel.load(load_dir=model_name_or_path,
                                           device=device,
                                           strict=strict)
            if task_type == "embeddings":
                processor = InferenceProcessor.load_from_dir(
                    model_name_or_path)
            else:
                processor = Processor.load_from_dir(model_name_or_path)

            # override processor attributes loaded from config file with inferencer params
            processor.max_seq_len = max_seq_len
            if hasattr(processor, "doc_stride"):
                processor.doc_stride = doc_stride

        # b) or from remote transformers model hub
        else:
            logger.info(
                f"Could not find `{model_name_or_path}` locally. Try to download from model hub ..."
            )
            if not task_type:
                raise ValueError(
                    "Please specify the 'task_type' of the model you want to load from transformers. "
                    "Valid options for arg `task_type`:"
                    "'question_answering', 'embeddings', 'text_classification', 'ner'"
                )

            model = AdaptiveModel.convert_from_transformers(
                model_name_or_path, device, task_type)
            config = AutoConfig.from_pretrained(model_name_or_path)
            tokenizer = Tokenizer.load(
                model_name_or_path,
                tokenizer_class=tokenizer_class,
                use_fast=use_fast,
                **tokenizer_args,
            )

            # TODO infer task_type automatically from config (if possible)
            if task_type == "question_answering":
                processor = SquadProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    label_list=["start_token", "end_token"],
                    metric="squad",
                    data_dir="data",
                    doc_stride=doc_stride)
            elif task_type == "embeddings":
                processor = InferenceProcessor(tokenizer=tokenizer,
                                               max_seq_len=max_seq_len)

            elif task_type == "text_classification":
                label_list = list(config.id2label[id]
                                  for id in range(len(config.id2label)))
                processor = TextClassificationProcessor(
                    tokenizer=tokenizer,
                    max_seq_len=max_seq_len,
                    data_dir="data",
                    label_list=label_list,
                    label_column_name="label",
                    metric="acc",
                    quote_char='"',
                )
            elif task_type == "ner":
                label_list = list(config.label2id.keys())
                processor = NERProcessor(tokenizer=tokenizer,
                                         max_seq_len=max_seq_len,
                                         data_dir="data",
                                         metric="seq_f1",
                                         label_list=label_list)
            else:
                raise ValueError(
                    f"`task_type` {task_type} is not supported yet. "
                    f"Valid options for arg `task_type`: 'question_answering', "
                    f"'embeddings', 'text_classification', 'ner'")

        if not isinstance(model, ONNXAdaptiveModel):
            model, _ = optimize_model(model=model,
                                      device=device,
                                      local_rank=-1,
                                      optimizer=None)
        return cls(model,
                   processor,
                   task_type=task_type,
                   batch_size=batch_size,
                   gpu=gpu,
                   name=name,
                   return_class_probs=return_class_probs,
                   extraction_strategy=extraction_strategy,
                   extraction_layer=extraction_layer,
                   s3e_stats=s3e_stats,
                   num_processes=num_processes,
                   disable_tqdm=disable_tqdm,
                   benchmarking=benchmarking,
                   dummy_ph=dummy_ph)
Ejemplo n.º 13
0
dirname = os.path.dirname(__file__)

configFile = os.path.join(dirname,'config.yaml')

import yaml

config = None
with open(configFile) as file:
  config = yaml.safe_load(file)

sqlUrlFAQ = config["sqlUrlFAQ"]


model = AdaptiveModel.convert_from_transformers(
    "deepset/sentence_bert", device="cpu", task_type="embeddings")
processor = Processor.convert_from_transformers(
    "deepset/sentence_bert", task_type="embeddings", max_seq_len=384, doc_stride=128)
model.save("sentence_bert-saved")
processor.save("sentence_bert-saved")


document_store = FAISSDocumentStore(sql_url=sqlUrlFAQ)


# from haystack.retriever.dense import DensePassageRetriever
# retriever = DensePassageRetriever(document_store=document_store,
#                                   query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
#                                   passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
#                                   max_seq_len_query=64,
#                                   max_seq_len_passage=256,
#                                   batch_size=16,
#                                   use_gpu=True,
Ejemplo n.º 14
0
def run_experiment(args):
    logger.info("\n***********************************************"
                f"\n************* Experiment: {args.task.name} ************"
                "\n************************************************")
    ml_logger = MlLogger(tracking_uri=args.logging.mlflow_url)
    ml_logger.init_experiment(
        experiment_name=args.logging.mlflow_experiment,
        run_name=args.logging.mlflow_run_name,
        nested=args.logging.mlflow_nested,
    )

    validate_args(args)
    distributed = bool(args.general.local_rank != -1)

    # Init device and distributed settings
    device, n_gpu = initialize_device_settings(
        use_cuda=args.general.cuda,
        local_rank=args.general.local_rank,
        use_amp=args.general.use_amp,
    )

    args.parameter.batch_size = int(args.parameter.batch_size //
                                    args.parameter.gradient_accumulation_steps)

    set_all_seeds(args.general.seed)

    # Prepare Data
    tokenizer = Tokenizer.load(args.parameter.model,
                               do_lower_case=args.parameter.lower_case)

    processor = Processor.load(
        tokenizer=tokenizer,
        max_seq_len=args.parameter.max_seq_len,
        data_dir=Path(args.general.data_dir),
        **args.task.toDict(
        ),  # args is of type DotMap and needs conversion to std python dicts
    )

    data_silo = DataSilo(
        processor=processor,
        batch_size=args.parameter.batch_size,
        distributed=distributed,
    )

    class_weights = None
    if args.parameter.balance_classes:
        task_names = list(processor.tasks.keys())
        if len(task_names) > 1:
            raise NotImplementedError(
                f"Balancing classes is currently not supported for multitask experiments. Got tasks:  {task_names} "
            )
        class_weights = data_silo.calculate_class_weights(
            task_name=task_names[0])

    model = get_adaptive_model(
        lm_output_type=args.parameter.lm_output_type,
        prediction_heads=args.parameter.prediction_head,
        layer_dims=args.parameter.layer_dims,
        model=args.parameter.model,
        device=device,
        class_weights=class_weights,
        embeds_dropout_prob=args.parameter.embeds_dropout_prob,
    )

    # Init optimizer
    optimizer_opts = args.optimizer.optimizer_opts.toDict(
    ) if args.optimizer.optimizer_opts else None
    schedule_opts = args.optimizer.schedule_opts.toDict(
    ) if args.optimizer.schedule_opts else None
    model, optimizer, lr_schedule = initialize_optimizer(
        model=model,
        learning_rate=args.optimizer.learning_rate,
        schedule_opts=schedule_opts,
        optimizer_opts=optimizer_opts,
        use_amp=args.general.use_amp,
        n_batches=len(data_silo.loaders["train"]),
        grad_acc_steps=args.parameter.gradient_accumulation_steps,
        n_epochs=args.parameter.epochs,
        device=device)

    model_name = (
        f"{model.language_model.name}-{model.language_model.language}-{args.task.name}"
    )

    # An early stopping instance can be used to save the model that performs best on the dev set
    # according to some metric and stop training when no improvement is happening for some iterations.
    if "early_stopping" in args:
        early_stopping = EarlyStopping(
            metric=args.task.metric,
            mode=args.early_stopping.mode,
            save_dir=Path(
                f"{args.general.output_dir}/{model_name}_early_stopping"
            ),  # where to save the best model
            patience=args.early_stopping.
            patience  # number of evaluations to wait for improvement before terminating the training
        )
    else:
        early_stopping = None

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=args.parameter.epochs,
        n_gpu=n_gpu,
        grad_acc_steps=args.parameter.gradient_accumulation_steps,
        use_amp=args.general.use_amp,
        local_rank=args.general.local_rank,
        lr_schedule=lr_schedule,
        evaluate_every=args.logging.eval_every,
        device=device,
        early_stopping=early_stopping)

    model = trainer.train()

    processor.save(Path(f"{args.general.output_dir}/{model_name}"))
    model.save(Path(f"{args.general.output_dir}/{model_name}"))

    ml_logger.end_run()
Ejemplo n.º 15
0
def run_experiment(args):
    validate_args(args)
    directory_setup(output_dir=args.output_dir, do_train=args.do_train)
    distributed = bool(args.local_rank != -1)

    # Init device and distributed settings
    device, n_gpu = initialize_device_settings(use_cuda=args.cuda,
                                               local_rank=args.local_rank,
                                               fp16=args.fp16)

    args.batch_size = args.batch_size // args.gradient_accumulation_steps
    if n_gpu > 1:
        args.batch_size = args.batch_size * n_gpu
    set_all_seeds(args.seed)

    # Prepare Data
    tokenizer = BertTokenizer.from_pretrained(args.model,
                                              do_lower_case=args.lower_case)
    processor = Processor.load(
        processor_name=args.processor_name,
        tokenizer=tokenizer,
        max_seq_len=args.max_seq_len,
        data_dir=args.data_dir,
    )

    data_silo = DataSilo(processor=processor,
                         batch_size=args.batch_size,
                         distributed=distributed)

    class_weights = None
    if args.balance_classes:
        class_weights = data_silo.class_weights

    model = get_adaptive_model(
        lm_output_type=args.lm_output_type,
        prediction_heads=args.prediction_head,
        layer_dims=args.layer_dims,
        model=args.model,
        device=device,
        class_weights=class_weights,
        fp16=args.fp16,
        embeds_dropout_prob=args.embeds_dropout_prob,
        local_rank=args.local_rank,
        n_gpu=n_gpu,
    )

    # Init optimizer

    # TODO: warmup linear is sometimes NONE depending on fp16 - is there a neater way to handle this?
    optimizer, warmup_linear = initialize_optimizer(
        model=model,
        learning_rate=args.learning_rate,
        warmup_proportion=args.warmup_proportion,
        loss_scale=args.loss_scale,
        fp16=args.fp16,
        n_examples=data_silo.n_samples("train"),
        batch_size=args.batch_size,
        grad_acc_steps=args.gradient_accumulation_steps,
        n_epochs=args.epochs,
    )

    trainer = Trainer(
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=args.epochs,
        n_gpu=n_gpu,
        grad_acc_steps=args.gradient_accumulation_steps,
        fp16=args.fp16,
        warmup_linear=warmup_linear,
        evaluate_every=args.eval_every,
        device=device,
    )

    model = trainer.train(model)

    model_name = (
        f"{model.language_model.name}-{model.language_model.language}-{args.name}"
    )
    processor.save(f"saved_models/{model_name}")
    model.save(f"saved_models/{model_name}")
Ejemplo n.º 16
0
    config = yaml.safe_load(file)

modelName = config["model"]
sqlUrl = config["sqlUrl"]
modelDir = config["modelDir"]
doc_dir = os.path.join(dirname, config["docDir"])

# download and save the model
from farm.modeling.adaptive_model import AdaptiveModel
from farm.data_handler.processor import Processor

model = AdaptiveModel.convert_from_transformers(modelName,
                                                device="cpu",
                                                task_type="question_answering")
processor = Processor.convert_from_transformers(modelName,
                                                task_type="question_answering",
                                                max_seq_len=384,
                                                doc_stride=128)
model.save(modelDir)
processor.save(modelDir)

# try:
# //or we should config the cache_dir on from_pretrain # dont rely on cache , must save it with save()
# model = TransformersReader(model_name_or_path=modelName, use_gpu=-1)
# model = FARMReader(model_name_or_path=modelName,
#                                use_gpu=False, no_ans_boost=0)
# except:
#   pass;

document_store = SQLDocumentStore(url=sqlUrl)
# document_store = FAISSDocumentStore(sql_url=sqlUrl)
Ejemplo n.º 17
0
def run_experiment(args):
    validate_args(args)
    distributed = bool(args.general.local_rank != -1)

    # Init device and distributed settings
    device, n_gpu = initialize_device_settings(
        use_cuda=args.general.cuda,
        local_rank=args.general.local_rank,
        fp16=args.general.fp16,
    )

    args.parameter.batch_size = int(args.parameter.batch_size //
                                    args.parameter.gradient_accumulation_steps)
    if n_gpu > 1:
        args.parameter.batch_size = args.parameter.batch_size * n_gpu
    set_all_seeds(args.general.seed)

    # Prepare Data
    tokenizer = BertTokenizer.from_pretrained(
        args.parameter.model, do_lower_case=args.parameter.lower_case)
    # processor = Processor.load(
    #     tokenizer=tokenizer,
    #     max_seq_len=args.parameter.max_seq_len,
    #     data_dir=args.general.data_dir,
    #     train_filename=args.task.train_filename,
    #     dev_filename=args.task.dev_filename,
    #     test_filename=args.task.test_filename,
    #     dev_split=args.task.dev_split,
    #     metrics=args.task.metrics,
    #     **args.task.toDict(),  # args is of type DotMap and needs conversion to std python dicts
    # )
    processor = Processor.load(
        tokenizer=tokenizer,
        max_seq_len=args.parameter.max_seq_len,
        data_dir=args.general.data_dir,
        **args.task.toDict(
        ),  # args is of type DotMap and needs conversion to std python dicts
    )

    data_silo = DataSilo(
        processor=processor,
        batch_size=args.parameter.batch_size,
        distributed=distributed,
    )

    class_weights = None
    if args.parameter.balance_classes:
        class_weights = data_silo.class_weights

    model = get_adaptive_model(
        lm_output_type=args.parameter.lm_output_type,
        prediction_heads=args.parameter.prediction_head,
        layer_dims=args.parameter.layer_dims,
        model=args.parameter.model,
        device=device,
        class_weights=class_weights,
        embeds_dropout_prob=args.parameter.embeds_dropout_prob,
    )

    # Init optimizer

    # TODO: warmup linear is sometimes NONE depending on fp16 - is there a neater way to handle this?
    optimizer, warmup_linear = initialize_optimizer(
        model=model,
        learning_rate=args.parameter.learning_rate,
        warmup_proportion=args.parameter.warmup_proportion,
        loss_scale=args.general.loss_scale,
        fp16=args.general.fp16,
        n_batches=len(data_silo.loaders["train"]),
        grad_acc_steps=args.parameter.gradient_accumulation_steps,
        n_epochs=args.parameter.epochs,
    )

    trainer = Trainer(
        optimizer=optimizer,
        data_silo=data_silo,
        epochs=args.parameter.epochs,
        n_gpu=n_gpu,
        grad_acc_steps=args.parameter.gradient_accumulation_steps,
        fp16=args.general.fp16,
        local_rank=args.general.local_rank,
        warmup_linear=warmup_linear,
        evaluate_every=args.logging.eval_every,
        device=device,
    )

    model = trainer.train(model)

    model_name = (
        f"{model.language_model.name}-{model.language_model.language}-{args.task.name}"
    )
    processor.save(f"{args.general.output_dir}/{model_name}")
    model.save(f"{args.general.output_dir}/{model_name}")