Example #1
0
def test_conversion_adaptive_model_classification():
    farm_model = Converter.convert_from_transformers("deepset/bert-base-german-cased-hatespeech-GermEval18Coarse", device="cpu")
    transformer_model = farm_model.convert_to_transformers()[0]
    transformer_model2 = AutoModelForSequenceClassification.from_pretrained("deepset/bert-base-german-cased-hatespeech-GermEval18Coarse")
    # compare weights
    for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()):
        assert (p1.data.ne(p2.data).sum() == 0)
Example #2
0
def test_conversion_adaptive_model_ner():
    farm_model = Converter.convert_from_transformers("dslim/bert-base-NER", device="cpu")
    transformer_model = farm_model.convert_to_transformers()[0]
    transformer_model2 = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
    # compare weights
    for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()):
        assert (p1.data.ne(p2.data).sum() == 0)
Example #3
0
def convert_to_transformers():
    farm_input_dir = Path(
        "../saved_models/farm-bert-base-german-cased-hatespeech-GermEval18Coarse"
    )
    transformers_output_dir = "../saved_models/bert-base-german-cased-hatespeech-GermEval18Coarse"
    #
    # # # load from FARM format
    model = AdaptiveModel.load(farm_input_dir, device="cpu")
    processor = Processor.load_from_dir(farm_input_dir)
    model.connect_heads_with_processor(processor.tasks)

    # convert to transformers
    transformer_model = Converter.convert_to_transformers(model)[0]
    # # Alternative way to convert to transformers:
    #transformer_model = model.convert_to_transformers()[0]

    # save it (note: transformers use str instead of Path objects)
    Path(transformers_output_dir).mkdir(parents=True, exist_ok=True)
    transformer_model.save_pretrained(transformers_output_dir)
    processor.tokenizer.save_pretrained(transformers_output_dir)

    # run predictions (using transformers)
    nlp = pipeline('sentiment-analysis',
                   model=str(transformers_output_dir),
                   tokenizer=str(transformers_output_dir))
    res = nlp("Was ein scheiß Nazi!")
    pprint.pprint(res)
Example #4
0
def test_conversion_adaptive_model_lm():
    farm_model = Converter.convert_from_transformers("bert-base-german-cased", device="cpu")
    transformer_model = farm_model.convert_to_transformers()[0]
    transformer_model2 = AutoModelWithLMHead.from_pretrained("bert-base-german-cased")
    # compare weights
    for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()):
        assert (p1.data.ne(p2.data).sum() == 0)
Example #5
0
def test_conversion_adaptive_model_qa():
    farm_model = Converter.convert_from_transformers("deepset/bert-base-cased-squad2", device="cpu")
    transformer_model = farm_model.convert_to_transformers()[0]
    transformer_model2 = AutoModelForQuestionAnswering.from_pretrained("deepset/bert-base-cased-squad2")
    # compare weights
    for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()):
        assert (p1.data.ne(p2.data).sum() == 0)
Example #6
0
def convert_from_transformers():
    transformers_input_name = "deepset/bert-base-german-cased-hatespeech-GermEval18Coarse"
    farm_output_dir = Path(
        "../saved_models/farm-bert-base-german-cased-hatespeech-GermEval18Coarse"
    )

    # # CASE 1: MODEL
    # # Load model from transformers model hub (-> continue training / compare models / ...)
    model = Converter.convert_from_transformers(transformers_input_name,
                                                device="cpu")

    # # Alternative way to load from transformers model hub:
    #model = AdaptiveModel.convert_from_transformers(transformers_input_name, device="cpu", task_type="text_classification")
    # # ... continue as in the other examples e.g. to fine-tune this QA model on your own data
    #
    # # CASE 2: INFERENCER
    # # Load Inferencer from transformers, incl. model & tokenizer (-> just get predictions)
    nlp = Inferencer.load(transformers_input_name,
                          task_type="text_classification")
    #
    # # run predictions
    result = nlp.inference_from_dicts(dicts=[{"text": "Was ein scheiß Nazi!"}])
    pprint.pprint(result)
    nlp.close_multiprocessing_pool()

    # save it
    nlp.save(farm_output_dir)
def convert_to_transformers():
    farm_model_dir = Path("../saved_models/bert-english-qa-large")

    # load from FARM format
    model = AdaptiveModel.load(farm_model_dir, device="cpu")
    tokenizer = Tokenizer.load(farm_model_dir)

    # convert to transformers
    transformer_model = Converter.convert_to_transformers(model)[0]
    #Alternative way to convert to transformers:
    #transformer_model = model.convert_to_transformers()[0]

    # save it (Note: transformers uses strings rather than Path objects)
    model_dir = "../saved_models/bert-large-uncased-whole-word-masking-squad2"
    os.makedirs(model_dir, exist_ok=True)
    transformer_model.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)

    # run predictions (using transformers)
    nlp = pipeline('question-answering', model=model_dir, tokenizer=model_dir)
    res = nlp({
        'question': 'Why is model conversion important?',
        'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'
    })
    pprint.pprint(res)
Example #8
0
def test_multiple_prediction_heads():
    model = "bert-base-german-cased"
    lm = LanguageModel.load(model)
    ph1 = TextClassificationHead(num_labels=3, label_list=["negative", "neutral", "positive"])
    ph2 = TokenClassificationHead(num_labels=3, label_list=["PER", "LOC", "ORG"])
    adaptive_model = AdaptiveModel(language_model=lm, prediction_heads=[ph1, ph2], embeds_dropout_prob=0.1,
                                   lm_output_types="per_token", device="cpu")
    transformer_models = Converter.convert_to_transformers(adaptive_model)
    assert isinstance(transformer_models[0], BertForSequenceClassification)
    assert isinstance(transformer_models[1], BertForTokenClassification)
    del lm
    del transformer_models
    del adaptive_model
Example #9
0
def test_conversion_inferencer_qa():
    # input
    question = "Why is model conversion important?"
    text = "The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks."

    # Load from model hub
    model = "deepset/bert-base-cased-squad2"
    nlp = Inferencer.load(model, task_type="question_answering", num_processes=0)

    assert nlp.processor.tokenizer.do_lower_case == False
    assert nlp.processor.tokenizer.is_fast == True

    QA_input = [{"questions": [question], "text": text}]
    result_farm = nlp.inference_from_dicts(dicts=QA_input)
    answer_farm = result_farm[0]["predictions"][0]["answers"][0]["answer"]
    assert answer_farm == 'gives freedom to the user'

    # save it
    farm_model_dir = Path("testsave/bert-conversion-test")
    nlp.save(farm_model_dir)

    # free RAM
    del nlp

    # load from disk in FARM format
    model = AdaptiveModel.load(farm_model_dir, device="cpu")
    tokenizer = Tokenizer.load(farm_model_dir)

    # convert to transformers
    transformer_model = Converter.convert_to_transformers(model)[0]

    # free RAM
    del model

    # save it (Note: transformers uses strings rather than Path objects)
    model_dir = "testsave/bert-conversion-test-hf"
    os.makedirs(model_dir, exist_ok=True)
    transformer_model.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)
    del transformer_model
    del tokenizer

    # run predictions (using transformers)
    nlp = pipeline('question-answering', model=model_dir, tokenizer=model_dir)
    result_transformers = nlp({
        'question': question,
        'context': text
    })
    answer_transformers = result_transformers["answer"]
    assert answer_farm == answer_transformers
    del nlp
Example #10
0
def test_conversion_inferencer_ner():
    # input
    text = "Paris is a town in France."

    # Load from model hub
    model = "dslim/bert-base-NER"
    nlp = Inferencer.load(model, task_type="ner", num_processes=0)

    assert nlp.processor.tokenizer.do_lower_case == False
    assert nlp.processor.tokenizer.is_fast == True

    input = [{"text": text}]
    result_farm = nlp.inference_from_dicts(dicts=input)
    pred_farm = result_farm[0]["predictions"]
    assert pred_farm[0][0]["label"] == 'LOC'
    assert pred_farm[0][1]["label"] == 'LOC'
    assert len(pred_farm[0]) == 2

    # save it
    farm_model_dir = Path("testsave/bert-conversion-test-hf")
    nlp.save(farm_model_dir)
    del nlp

    # load from disk in FARM format
    model = AdaptiveModel.load(farm_model_dir, device="cpu")
    tokenizer = Tokenizer.load(farm_model_dir)

    # convert to transformers
    transformer_model = Converter.convert_to_transformers(model)[0]
    del model

    # save it (Note: transformers uses strings rather than Path objects)
    model_dir = "testsave/bert-conversion-test-hf"
    os.makedirs(model_dir, exist_ok=True)
    transformer_model.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)
    del transformer_model
    del tokenizer

    # run predictions (using transformers)
    nlp = pipeline('ner', model=model_dir, tokenizer=model_dir)
    result_transformers = nlp(text)
    assert result_transformers[0]["entity"] == 'B-LOC'
    assert result_transformers[1]["entity"] == 'B-LOC'
    assert len(result_transformers) == 2
    del nlp
Example #11
0
def test_conversion_inferencer_classification():
    # input
    text = "Das ist blöd."

    # Load from model hub
    model = "deepset/bert-base-german-cased-hatespeech-GermEval18Coarse"
    nlp = Inferencer.load(model,
                          task_type="text_classification",
                          num_processes=0)

    assert nlp.processor.tokenizer.do_lower_case == False
    assert nlp.processor.tokenizer.is_fast == True

    input = [{"text": text}]
    result_farm = nlp.inference_from_dicts(dicts=input)
    pred_farm = result_farm[0]["predictions"][0]["label"]
    assert pred_farm == 'OFFENSE'

    # save it
    farm_model_dir = Path("testsave/bert-conversion-test-hf")
    nlp.save(farm_model_dir)
    del nlp

    # load from disk in FARM format
    model = AdaptiveModel.load(farm_model_dir, device="cpu")
    tokenizer = Tokenizer.load(farm_model_dir)

    # convert to transformers
    transformer_model = Converter.convert_to_transformers(model)[0]
    del model

    # save it (Note: transformers uses strings rather than Path objects)
    model_dir = "testsave/bert-conversion-test-hf"
    os.makedirs(model_dir, exist_ok=True)
    transformer_model.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)
    del transformer_model
    del tokenizer

    # run predictions (using transformers)
    nlp = pipeline('sentiment-analysis', model=model_dir, tokenizer=model_dir)
    result_transformers = nlp(text)
    pred_transformers = result_transformers[0]["label"]
    assert pred_farm == pred_transformers
    del nlp
def convert_from_transformers():
    # CASE 1: MODEL
    # Load model from transformers model hub (-> continue training / compare models / ...)
    model = Converter.convert_from_transformers("deepset/bert-large-uncased-whole-word-masking-squad2", device="cpu")
    #Alternative way to load from transformers model hub:
    #model = AdaptiveModel.convert_from_transformers("deepset/bert-large-uncased-whole-word-masking-squad2", device="cpu", task_type="question_answering")
    # ... continue as in the other examples e.g. to fine-tune this QA model on your own data

    # CASE 2: INFERENCER
    # Load Inferencer from transformers, incl. model & tokenizer (-> just get predictions)
    nlp = Inferencer.load("deepset/bert-large-uncased-whole-word-masking-squad2", task_type="question_answering")

    # run predictions
    QA_input = [{"questions": ["Why is model conversion important?"],
                 "text": "The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks."}]
    result = nlp.inference_from_dicts(dicts=QA_input)
    pprint.pprint(result)
    nlp.close_multiprocessing_pool()

    # save it
    farm_model_dir = Path("../saved_models/bert-english-qa-large")
    nlp.save(farm_model_dir)