Beispiel #1
0
def test_onnx_conversion_and_inference(tmp_path, model_name):
    AdaptiveModel.convert_to_onnx(model_name=model_name,
                                  output_path=tmp_path / "test-onnx",
                                  task_type="question_answering")
    onnx_inferencer = Inferencer.load(tmp_path / "test-onnx",
                                      task_type="question_answering",
                                      num_processes=0)
    qa_input = [{
        "questions": ["What is the population of Berlin?"],
        "text":
        "Berlin is the capital and largest city of Germany by both area and population. Its 3,769,495 "
        "inhabitants as of December 31, 2019 make it the most populous city of the European Union, "
        "according to population within city limits.The city is also one of Germany's 16 federal states.",
    }]
    result_onnx = onnx_inferencer.inference_from_dicts(qa_input)[0]
    assert result_onnx["predictions"][0]["answers"][0]["answer"] == "3,769,495"

    pytorch_inferencer = Inferencer.load(model_name,
                                         task_type="question_answering",
                                         num_processes=0)
    result_pytorch = pytorch_inferencer.inference_from_dicts(qa_input)[0]

    for (onnx, pytorch) in zip(
            result_onnx["predictions"][0]["answers"][0].items(),
            result_pytorch["predictions"][0]["answers"][0].items()):
        # keys
        assert onnx[0] == pytorch[0]
        # values
        if type(onnx[1]) == float:
            np.testing.assert_almost_equal(onnx[1], pytorch[1],
                                           decimal=4)  # score
        else:
            assert onnx[1] == pytorch[1]
def test_qa_onnx_inference(caplog=None):
    if caplog:
        caplog.set_level(logging.CRITICAL)

    QA_input = [
        {
            "questions": ["Who counted the game among the best ever made?"],
            "text": "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
        }]
    base_LM_model = "deepset/bert-base-cased-squad2"

    # Pytorch
    inferencer = Inferencer.load(base_LM_model, batch_size=2, gpu=False, task_type="question_answering",
                                 num_processes=0)
    result = inferencer.inference_from_dicts(dicts=QA_input)[0]

    # ONNX
    onnx_model_export_path = Path("testsave/onnx-export")
    inferencer.model.convert_to_onnx(onnx_model_export_path)
    inferencer = Inferencer.load(model_name_or_path=onnx_model_export_path, task_type="question_answering", num_processes=0)

    result_onnx = inferencer.inference_from_dicts(QA_input)[0]

    for (onnx, regular) in zip(result_onnx["predictions"][0]["answers"][0].items(), result["predictions"][0]["answers"][0].items()):
        # keys
        assert onnx[0] == regular[0]
        # values
        if type(onnx[1]) == float:
            np.testing.assert_almost_equal(onnx[1], regular[1], decimal=4)  # score
        else:
            assert onnx[1] == regular[1]
Beispiel #3
0
def test_s3e_fit():
    # small test data
    language_model = Path("samples/s3e/tiny_fasttext_model")
    corpus_path = Path("samples/s3e/tiny_corpus.txt")
    save_dir = Path("testsave/fitted_s3e/")
    do_lower_case = False
    batch_size = 2
    use_gpu = False

    # Fit S3E on a corpus
    set_all_seeds(seed=42)
    device, n_gpu = initialize_device_settings(use_cuda=use_gpu, use_amp=False)

    # Create a InferenceProcessor
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=language_model, do_lower_case=do_lower_case)
    processor = InferenceProcessor(tokenizer=tokenizer, max_seq_len=128)

    # Create an AdaptiveModel
    language_model = LanguageModel.load(language_model)

    model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[],
        embeds_dropout_prob=0.1,
        lm_output_types=[],
        device=device)

    model, processor, s3e_stats = fit_s3e_on_corpus(processor=processor,
                                                    model=model,
                                                    corpus=corpus_path,
                                                    n_clusters=3,
                                                    pca_n_components=30,
                                                    svd_postprocessing=True,
                                                    min_token_occurrences=1)

    # save everything to allow inference without fitting everything again
    model.save(save_dir)
    processor.save(save_dir)
    with open(save_dir / "s3e_stats.pkl", "wb") as f:
        pickle.dump(s3e_stats, f)

    # Load model, tokenizer and processor directly into Inferencer
    inferencer = Inferencer(model=model, processor=processor, task_type="embeddings", gpu=use_gpu,
                       batch_size=batch_size, extraction_strategy="s3e", extraction_layer=-1,
                       s3e_stats=s3e_stats, num_processes=0)

    # Input
    basic_texts = [
        {"text": "a man is walking on the street."},
        {"text": "a woman is walking on the street."},
    ]

    # Get embeddings for input text (you can vary the strategy and layer)
    result = inferencer.inference_from_dicts(dicts=basic_texts)
    assert result[0]["context"] == basic_texts[0]["text"]
    assert result[0]["vec"][0] - 0.00527727306941057 < 1e-6
    assert result[0]["vec"][-2] - 0.06285100416478565 < 1e-6
Beispiel #4
0
def load_qna_model(model_dir):
    if torch.cuda.is_available():
        nlp = Inferencer.load(model_dir,
                              task_type="question_answering",
                              gpu=True)
        nlp.save(model_dir)
    else:
        nlp = Inferencer.load(model_dir,
                              task_type="question_answering",
                              gpu=False)
        nlp.save(model_dir)
    return nlp
Beispiel #5
0
def embedding_extraction():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO)

    ##########################
    ########## Settings
    ##########################
    set_all_seeds(seed=42)
    # load from a local path:
    #lang_model = Path("../saved_models/glove-german-uncased")
    # or through s3
    lang_model = "glove-german-uncased"  #only glove or word2vec or converted fasttext (fixed vocab) embeddings supported
    do_lower_case = True
    use_amp = None
    device, n_gpu = initialize_device_settings(use_cuda=True, use_amp=use_amp)

    # Create a InferenceProcessor
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=do_lower_case)
    processor = InferenceProcessor(tokenizer=tokenizer, max_seq_len=128)

    # Create an AdaptiveModel
    language_model = LanguageModel.load(lang_model)
    model = AdaptiveModel(language_model=language_model,
                          prediction_heads=[],
                          embeds_dropout_prob=0.1,
                          lm_output_types=["per_sequence"],
                          device=device)

    # Create Inferencer for embedding extraction
    inferencer = Inferencer(model=model,
                            processor=processor,
                            task_type="embeddings")

    # Extract vectors
    basic_texts = [
        {
            "text":
            "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot sei"
        },
        {
            "text": "Martin Müller spielt Handball in Berlin"
        },
    ]

    result = inferencer.extract_vectors(dicts=basic_texts,
                                        extraction_strategy="cls_token",
                                        extraction_layer=-1)
    print(result)
    inferencer.close_multiprocessing_pool()
def index():
    inp1 = {}
    inp2 = {}
    inp3 = {}

    if request.method == 'POST':
        cog_labels = {}
        emo_labels = {}

        cognitive_inferencer = Inferencer.load(model_dir_cog)
        emotional_inferencer = Inferencer.load(model_dir_emo)

        inp1['text'] = request.form['strengths']
        inp2['text'] = request.form['weaknesses']
        inp3['text'] = request.form['suggestions']

        cog_labels['strength'] = inference_cognitive(cognitive_inferencer,
                                                     [inp1])
        emo_labels['strength'] = inference_emotional(emotional_inferencer,
                                                     [inp1])

        cog_labels['weakness'] = inference_cognitive(cognitive_inferencer,
                                                     [inp2])
        emo_labels['weakness'] = inference_emotional(emotional_inferencer,
                                                     [inp2])

        cog_labels['suggestion'] = inference_cognitive(cognitive_inferencer,
                                                       [inp3])
        emo_labels['suggestion'] = inference_emotional(emotional_inferencer,
                                                       [inp3])

        percentage_score = calculate_empathy_score(emo_labels, cog_labels)

        feedback = get_feedback(percentage_score)

        del cognitive_inferencer
        del emotional_inferencer

        # return redirect(url_for('index', perc_score=percentage_score, feedback=feedback, emo_labels=emo_labels, cog_labels=cog_labels))
        return render_template(
            'index.html',
            perc_score=percentage_score,
            feedback=feedback,
            emo_labels=emo_labels,
            cog_labels=cog_labels,
            emo_feedback=get_emotional_feedback(),
            cog_feedback=get_cognitive_feedback(),
        )
    return render_template('index.html', onclick="popup.html")
Beispiel #7
0
def score(task):
    task_type = cu.tasks.get(str(task)).get('type')
    if task_type == 'classification':
        _dt = dt.Data(task=task, inference=True)
        return Inferencer.load(_dt.get_path('model_dir'))
    elif task_type == 'multi_classification':
        _dt = dt.Data(task=task, inference=True)
        return Inferencer.load(_dt.get_path('model_dir'))
    elif task_type == 'ner':
        return ner.NER(task=task, inference=True)
    elif task_type == 'qa':
        return rank.Rank(task=task, inference=True)
    else:
        logger.warning('TASK TYPE NOT SUPPORTED')
        return None
Beispiel #8
0
def test_embeddings_extraction(num_processes):
    # Input
    basic_texts = [
        {
            "text":
            "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot ist"
        },
        {
            "text": "Martin Müller spielt Fussball"
        },
    ]

    # Load model, tokenizer and processor directly into Inferencer
    model = Inferencer.load(model_name_or_path="bert-base-german-cased",
                            task_type="embeddings",
                            gpu=False,
                            batch_size=5,
                            extraction_strategy="reduce_mean",
                            extraction_layer=-2,
                            num_processes=num_processes)

    # Get embeddings for input text (you can vary the strategy and layer)
    result = model.inference_from_dicts(dicts=basic_texts)
    assert result[0]["context"] == [
        'Schar', '##tau', 'sagte', 'dem', 'Tages', '##spiegel', ',', 'dass',
        'Fischer', 'ein', 'Id', '##iot', 'ist'
    ]
    assert np.isclose(result[0]["vec"][0], 1.50174605e-02)
Beispiel #9
0
def get_inferencer(model_name_or_path, batch_size, strategy, layer):
    # Load inferencers for the two models
    model = Inferencer.load(
        model_name_or_path,
        batch_size=batch_size,
        gpu=True,
        task_type="embeddings",
        extraction_strategy=strategy.value,
        extraction_layer=layer,
    )
    if os.path.exists(model_name_or_path):
        model.processor = InferenceProcessor.load_from_dir(model_name_or_path)
        from_model_hub = False
    else:  # Convert processor in our custom InferenceProcessor to be able to load from file
        # This is to have casing, default for saved Albert models and especially needed
        # to match the shape of embeddings when using per_token strategy
        # keep_accents is True by default in FARM
        tokenizer = Tokenizer.load(
            model_name_or_path,
            do_lower_case=False,
            max_len=512,
        )
        model.processor = InferenceProcessor(tokenizer,
                                             model.processor.max_seq_len)
        from_model_hub = True
    return model, from_model_hub
def convert_from_transformers():
    # CASE 1: MODEL
    # Load model from transformers model hub (-> continue training / compare models / ...)
    model = AdaptiveModel.convert_from_transformers(
        "deepset/bert-large-uncased-whole-word-masking-squad2",
        device="cpu",
        task_type="question_answering")
    # ... continue as in the other examples e.g. to fine-tune this QA model on your own data

    # CASE 2: INFERENCER
    # Load Inferencer from transformers, incl. model & tokenizer (-> just get predictions)
    nlp = Inferencer.load(
        "deepset/bert-large-uncased-whole-word-masking-squad2",
        task_type="question_answering")

    # run predictions
    QA_input = [{
        "questions": ["Why is model conversion important?"],
        "text":
        "The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks."
    }]
    result = nlp.inference_from_dicts(dicts=QA_input, rest_api_schema=True)
    pprint.pprint(result)

    # save it
    farm_model_dir = Path("../saved_models/bert-english-qa-large")
    nlp.save(farm_model_dir)
Beispiel #11
0
 def post_init(self):
     """Load FARM-based text model"""
     from farm.infer import Inferencer
     self.model = Inferencer.load(
         model_name_or_path=self.pretrained_model_name_or_path,
         task_type='embeddings',
         num_processes=self.num_processes)
Beispiel #12
0
    def __init__(self, retriever: EmbeddingRetriever):

        self.embedding_model = Inferencer.load(
            retriever.embedding_model,
            revision=retriever.model_version,
            task_type="embeddings",
            extraction_strategy=retriever.pooling_strategy,
            extraction_layer=retriever.emb_extraction_layer,
            gpu=retriever.use_gpu,
            batch_size=4,
            max_seq_len=512,
            num_processes=0)
        # Check that document_store has the right similarity function
        similarity = retriever.document_store.similarity
        # If we are using a sentence transformer model
        if "sentence" in retriever.embedding_model.lower(
        ) and similarity != "cosine":
            logger.warning(
                f"You seem to be using a Sentence Transformer with the {similarity} function. "
                f"We recommend using cosine instead. "
                f"This can be set when initializing the DocumentStore")
        elif "dpr" in retriever.embedding_model.lower(
        ) and similarity != "dot_product":
            logger.warning(
                f"You seem to be using a DPR model with the {similarity} function. "
                f"We recommend using dot_product instead. "
                f"This can be set when initializing the DocumentStore")
def extract_embeddings(load_dir, use_gpu, batch_size):
    with open(load_dir / "s3e_stats.pkl", "rb") as f:
        s3e_stats = pickle.load(f)

    # Init inferencer
    inferencer = Inferencer.load(model_name_or_path=load_dir,
                                 task_type="embeddings",
                                 gpu=use_gpu,
                                 batch_size=batch_size,
                                 extraction_strategy="s3e",
                                 extraction_layer=-1,
                                 s3e_stats=s3e_stats)

    # Input
    basic_texts = [
        {
            "text": "a man is walking on the street."
        },
        {
            "text": "a woman is walking on the street."
        },
    ]

    # Get embeddings for input text
    result = inferencer.inference_from_dicts(dicts=basic_texts)
    print(result)
    inferencer.close_multiprocessing_pool()
def inference_with_multiprocessing():
    """
    The Inferencers(Inferencer/QAInferencer) create a multiprocessing Pool during the init, if the num_process argument
    is set greater than 0. This helps speed up pre-processing that happens on the CPU, before the model's forward pass
    on GPU(or CPU).

    Having the pool at the Inferencer level allows re-use across multiple inference requests. However, it needs to be
    closed properly to ensure there are no memory-leaks.

    For production environments, the Inferencer object can be wrapped in a try-finally block like in this example to
    ensure the Pool is closed even in the case of errors.
    """

    try:
        model = Inferencer.load("deepset/roberta-base-squad2", batch_size=40, task_type="question_answering", gpu=True)
        QA_input = [
            {
                "qas": ["Who counted the game among the best ever made?"],
                "context": "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
            }]
        result = model.inference_from_dicts(dicts=QA_input)[0]

        pprint.pprint(result)
    finally:
        model.close_multiprocessing_pool()
Beispiel #15
0
def embeddings_extraction():
    set_all_seeds(seed=42)
    batch_size = 32
    use_gpu = False
    lang_model = "bert-base-german-cased"
    # or local path:
    # lang_model = Path("../saved_models/farm-bert-base-cased-squad2")

    # Input
    basic_texts = [
        {
            "text":
            "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot ist"
        },
        {
            "text": "Martin Müller spielt Fussball"
        },
    ]

    # Load model, tokenizer and processor directly into Inferencer
    model = Inferencer.load(lang_model,
                            task_type="embeddings",
                            gpu=use_gpu,
                            batch_size=batch_size)

    # Get embeddings for input text (you can vary the strategy and layer)
    result = model.extract_vectors(dicts=basic_texts,
                                   extraction_strategy="cls_token",
                                   extraction_layer=-1)
    print(result)
Beispiel #16
0
def test_inferencer_with_fast_bert_tokenizer():
    model = Inferencer.load("bert-base-german-cased",
                            task_type='text_classification',
                            use_fast=True,
                            num_processes=0)
    tokenizer = model.processor.tokenizer
    assert type(tokenizer) is BertTokenizerFast
Beispiel #17
0
def onnx_runtime_example():
    """
    This example shows conversion of a transformers model from the Model Hub to
    ONNX format & inference using ONNXRuntime.
    """

    model_name_or_path = "deepset/roberta-base-squad2"
    onnx_model_export_path = Path("./roberta-onnx")

    AdaptiveModel.convert_to_onnx(model_name_or_path,
                                  onnx_model_export_path,
                                  task_type="question_answering")

    # for ONNX models, the Inferencer uses ONNXRuntime under-the-hood
    inferencer = Inferencer.load(model_name_or_path=onnx_model_export_path)

    qa_input = [{
        "questions": ["Who counted the game among the best ever made?"],
        "text":
        "Twilight Princess was released to universal critical acclaim and commercial success. "
        "It received perfect scores from major publications such as 1UP.com, Computer and Video Games, "
        "Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators "
        "GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii "
        "version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called "
        "it one of the greatest games ever created.",
    }]

    results = inferencer.inference_from_dicts(qa_input)
    print(results)
    inferencer.close_multiprocessing_pool()
def convert_from_transformers():
    transformers_input_name = "deepset/bert-base-german-cased-hatespeech-GermEval18Coarse"
    farm_output_dir = Path(
        "../saved_models/farm-bert-base-german-cased-hatespeech-GermEval18Coarse"
    )

    # # CASE 1: MODEL
    # # Load model from transformers model hub (-> continue training / compare models / ...)
    model = AdaptiveModel.convert_from_transformers(
        transformers_input_name, device="cpu", task_type="text_classification")
    # # ... continue as in the other examples e.g. to fine-tune this QA model on your own data
    #
    # # CASE 2: INFERENCER
    # # Load Inferencer from transformers, incl. model & tokenizer (-> just get predictions)
    nlp = Inferencer.load(transformers_input_name,
                          task_type="text_classification")
    #
    # # run predictions
    result = nlp.inference_from_dicts(dicts=[{
        "text": "Was ein scheiß Nazi!"
    }],
                                      rest_api_schema=True)
    pprint.pprint(result)

    # save it
    nlp.save(farm_output_dir)
Beispiel #19
0
def adaptive_model_qa(use_gpu, num_processes):
    """
    PyTest Fixture for a Question Answering Inferencer based on PyTorch.
    """
    try:
        model = Inferencer.load(
            "deepset/bert-base-cased-squad2",
            task_type="question_answering",
            batch_size=16,
            num_processes=num_processes,
            gpu=use_gpu,
        )
        yield model
    finally:
        if num_processes != 0:
            # close the pool
            # we pass join=True to wait for all sub processes to close
            # this is because below we want to test if all sub-processes
            # have exited
            model.close_multiprocessing_pool(join=True)

    # check if all workers (sub processes) are closed
    current_process = psutil.Process()
    children = current_process.children()
    assert len(children) == 0
Beispiel #20
0
    def __init__(self, opt, userid=None):
        # initialize defaults first
        super().__init__(opt, userid)
        self.model_path = modelzoo_path(
            self.opt.get('datapath'), self.opt.get('init_model')
        )
        self.proxies = {}
        self.episode_done = False
        self.solr_docs = None
        self.text = None
        self.qaid = None
        self.question = None
        self.reply = None
        self.report_log = {"pquad_explorer": {}, "logs": []}
        self.id = self.__class__.__name__
        self.top_k_doc = 3
        if self.opt.get('top_k_doc'):
            self.top_k_doc = self.opt.get('top_k_doc')
        self.batch_size = 4
        if self.opt.get('batch_size'):
            self.batch_size = self.opt.get('batch_size')
        self.top_k_candidates = 3
        if self.opt.get('top_k_candidates'):
            self.top_k_candidates = self.opt.get('top_k_candidates')

        self.model = Inferencer.load(self.model_path, batch_size=self.batch_size)
        logging.getLogger('farm.data_handler.processor').setLevel(logging.ERROR)
        logging.getLogger('farm.infer').setLevel(logging.ERROR)
        
        self.report_log = {"pquad_explorer": {}, "logs": []}
        # if user id is null, set a new one
        if userid is None:
            self.userid = str(uuid.uuid4())
Beispiel #21
0
def onnx_runtime_example():
    """
    This example converts a Question Answering FARM AdaptiveModel
    to ONNX format and uses ONNX Runtime for doing Inference.
    """

    device = "cpu"
    model_name_or_path = "deepset/bert-base-cased-squad2"
    onnx_model_export_path = Path("./onnx-export")

    model = AdaptiveModel.convert_from_transformers(model_name_or_path, device=device, task_type="question_answering")
    model.convert_to_onnx(onnx_model_export_path)

    inferencer = Inferencer.load(model_name_or_path=onnx_model_export_path)

    qa_input = [
        {
            "qas": ["Who counted the game among the best ever made?"],
            "context": "Twilight Princess was released to universal critical acclaim and commercial success. "
            "It received perfect scores from major publications such as 1UP.com, Computer and Video Games, "
            "Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators "
            "GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii "
            "version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called "
            "it one of the greatest games ever created.",
        }
    ]

    results = inferencer.inference_from_dicts(qa_input)
    print(results)
Beispiel #22
0
    def __init__(self,
                 model_name_or_path,
                 context_window_size=30,
                 no_ans_threshold=-100,
                 batch_size=16,
                 use_gpu=True,
                 n_candidates_per_passage=2):
        """
        :param model_name_or_path: directory of a saved model or the name of a public model:
                                   - 'bert-base-cased'
                                   - 'deepset/bert-base-cased-squad2'
                                   - 'deepset/bert-base-cased-squad2'
                                   - 'distilbert-base-uncased-distilled-squad'
                                   ....
                                   See https://huggingface.co/models for full list of available models.
        :param context_window_size: The size, in characters, of the window around the answer span that is used when displaying the context around the answer.
        :param no_ans_threshold: How much greater the no_answer logit needs to be over the pos_answer in order to be chosen.
                                 The higher the value, the more `uncertain` answers are accepted
        :param batch_size: Number of samples the model receives in one batch for inference
        :param use_gpu: Whether to use GPU (if available)
        :param n_candidates_per_passage: How many candidate answers are extracted per text sequence that the model can process at once (depends on `max_seq_len`).
                                         Note: This is not the number of "final answers" you will receive
                                         (see `top_k` in FARMReader.predict() or Finder.get_answers() for that)
        """

        self.inferencer = Inferencer.load(model_name_or_path,
                                          batch_size=batch_size,
                                          gpu=use_gpu,
                                          task_type="question_answering")
        self.inferencer.model.prediction_heads[
            0].context_window_size = context_window_size
        self.inferencer.model.prediction_heads[
            0].no_ans_threshold = no_ans_threshold
        self.inferencer.model.prediction_heads[
            0].n_best = n_candidates_per_passage
Beispiel #23
0
def test_load_extract_s3e_embeddings():
    load_dir = Path("samples/s3e/fitted_s3e")
    use_gpu = False
    batch_size = 2

    with open(load_dir / "s3e_stats.pkl", "rb") as f:
        s3e_stats = pickle.load(f)

    # Init inferencer
    inferencer = Inferencer.load(model_name_or_path=load_dir,
                                 task_type="embeddings",
                                 gpu=use_gpu,
                                 batch_size=batch_size,
                                 extraction_strategy="s3e",
                                 extraction_layer=-1,
                                 s3e_stats=s3e_stats,
                                 num_processes=0)

    # Input
    basic_texts = [
        {
            "text": "a man is walking on the street."
        },
        {
            "text": "a woman is walking on the street."
        },
    ]

    # Get embeddings for input text
    result = inferencer.inference_from_dicts(dicts=basic_texts)
    assert result[0]["context"] == [
        'a', 'man', 'is', 'walking', 'on', 'the', 'street', '.'
    ]
    assert result[0]["vec"][0] - 0.00527727306941057 < 1e-6
    assert result[0]["vec"][-2] + 0.06285100416478565 < 1e-6
Beispiel #24
0
    def __init__(self,
                 document_store,
                 embedding_model=None,
                 gpu=True,
                 model_format="farm"):
        """
        TODO
        :param document_store:
        :param embedding_model:
        :param gpu:
        :param model_format:
        """
        self.document_store = document_store
        self.model_format = model_format
        self.embedding_model = None
        # only needed if you want to retrieve via cosinge similarity of embeddings
        if embedding_model:
            logger.info(
                f"Init retriever using embeddings of model {embedding_model}")
            if model_format == "farm" or model_format == "transformers":
                self.embedding_model = Inferencer.load(embedding_model,
                                                       task_type="embeddings",
                                                       gpu=gpu,
                                                       batch_size=4,
                                                       max_seq_len=512)

            elif model_format == "sentence_transformers":
                from sentence_transformers import SentenceTransformer
                # pretrained embedding models coming from: https://github.com/UKPLab/sentence-transformers#pretrained-models
                # e.g. 'roberta-base-nli-stsb-mean-tokens'
                self.embedding_model = SentenceTransformer(embedding_model)
            else:
                raise NotImplementedError
Beispiel #25
0
def test_revision_default(caplog=None):
    # default model should be the same as v2
    model = Inferencer.load("deepset/roberta-base-squad2",
                            task_type="question_answering")
    assert torch.isclose(
        torch.sum(model.model.language_model.model.encoder.layer[0].
                  intermediate.dense.weight),
        torch.sum(torch.tensor([-21411.4414])))
    del model
Beispiel #26
0
def test_revision_v1(caplog=None):
    model = Inferencer.load("deepset/roberta-base-squad2",
                            revision="v1.0",
                            task_type="question_answering")
    assert torch.isclose(
        torch.sum(model.model.language_model.model.encoder.layer[0].
                  intermediate.dense.weight),
        torch.sum(torch.tensor([-21394.6055])))
    del model
Beispiel #27
0
    def __init__(
        self,
        document_store: BaseDocumentStore,
        embedding_model: str,
        use_gpu: bool = True,
        model_format: str = "farm",
        pooling_strategy: str = "reduce_mean",
        emb_extraction_layer: int = -1,
    ):
        """
        :param document_store: An instance of DocumentStore from which to retrieve documents.
        :param embedding_model: Local path or name of model in Hugging Face's model hub such as ``'deepset/sentence_bert'``
        :param use_gpu: Whether to use gpu or not
        :param model_format: Name of framework that was used for saving the model. Options:

                             - ``'farm'``
                             - ``'transformers'``
                             - ``'sentence_transformers'``
        :param pooling_strategy: Strategy for combining the embeddings from the model (for farm / transformers models only).
                                 Options:

                                 - ``'cls_token'`` (sentence vector)
                                 - ``'reduce_mean'`` (sentence vector)
                                 - ``'reduce_max'`` (sentence vector)
                                 - ``'per_token'`` (individual token vectors)
        :param emb_extraction_layer: Number of layer from which the embeddings shall be extracted (for farm / transformers models only).
                                     Default: -1 (very last layer).
        """
        self.document_store = document_store
        self.model_format = model_format
        self.pooling_strategy = pooling_strategy
        self.emb_extraction_layer = emb_extraction_layer

        logger.info(f"Init retriever using embeddings of model {embedding_model}")
        if model_format == "farm" or model_format == "transformers":
            self.embedding_model = Inferencer.load(
                embedding_model, task_type="embeddings", extraction_strategy=self.pooling_strategy,
                extraction_layer=self.emb_extraction_layer, gpu=use_gpu, batch_size=4, max_seq_len=512, num_processes=0
            )

        elif model_format == "sentence_transformers":
            try:
                from sentence_transformers import SentenceTransformer
            except ImportError:
                raise ImportError("Can't find package `sentence-transformers` \n"
                                  "You can install it via `pip install sentence-transformers` \n"
                                  "For details see https://github.com/UKPLab/sentence-transformers ")
            # pretrained embedding models coming from: https://github.com/UKPLab/sentence-transformers#pretrained-models
            # e.g. 'roberta-base-nli-stsb-mean-tokens'
            if use_gpu:
                device = "cuda"
            else:
                device = "cpu"
            self.embedding_model = SentenceTransformer(embedding_model, device=device)
        else:
            raise NotImplementedError
Beispiel #28
0
def test_wrong_revision(caplog=None):
    # We want this load attempt to fail because we specify an invalid revision
    failed_load = None
    try:
        failed_load = Inferencer.load("deepset/roberta-base-squad2",
                                      revision="xxx",
                                      task_type="question_answering")
    except:
        pass
    assert not failed_load
Beispiel #29
0
def embeddings_extraction():
    ##########################
    ########## Settings
    ##########################
    set_all_seeds(seed=42)
    batch_size = 32
    use_gpu = True
    device, n_gpu = initialize_device_settings(use_cuda=use_gpu)
    lang_model = "bert-base-german-cased"

    # 1.Create a tokenizer
    tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model,
                               do_lower_case=False)

    # 2. Create a lightweight Processor only for inference (no labels, minimal preprocessing)
    processor = InferenceProcessor(tokenizer=tokenizer, max_seq_len=128)

    # 4. Create an AdaptiveModel with  a pretrained language model as a basis
    language_model = LanguageModel.load(lang_model)

    adaptive_model = AdaptiveModel(
        language_model=language_model,
        prediction_heads=[],
        embeds_dropout_prob=0,
        lm_output_types=["per_token", "per_sequence"],
        device=device,
    )

    # 5. Extract embeddings with model in inference mode
    basic_texts = [
        {
            "text":
            "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot ist"
        },
        {
            "text": "Martin Müller spielt Fussball"
        },
    ]

    model = Inferencer(adaptive_model, processor, gpu=use_gpu)
    result = model.extract_vectors(dicts=basic_texts)
    print(result)
Beispiel #30
0
def adaptive_model_qa():
    # download the model from S3
    s3_resource = boto3.resource("s3")
    bucket = s3_resource.Bucket("deepset.ai-farm-models")
    prefix = "0.3.0/bert-english-qa-large/"
    for object in bucket.objects.filter(Prefix=prefix):
        if not os.path.exists(os.path.dirname(object.key)):
            os.makedirs(os.path.dirname(object.key))
        bucket.download_file(object.key, object.key)
    model = Inferencer.load(prefix, batch_size=16)
    return model