Esempio n. 1
0
def transformer_models(model_name):
    model = AutoModelForQuestionAnswering.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    qa_pipeline = pipeline('question-answering',
                           model=model,
                           tokenizer=tokenizer)
    return qa_pipeline
Esempio n. 2
0
    def __init__(self, CONFIG):
        super().__init__()
        self.CONFIG = CONFIG
        self.modelpath = "ahotrod/roberta_large_squad2"

        self.robertaconfig = AutoConfig.from_pretrained(self.modelpath)
        self.robertaconfig.output_hidden_states = True

        self.roberta = AutoModelForQuestionAnswering.from_pretrained(
            self.modelpath,
            config=self.robertaconfig,
        )._modules['roberta']

        self.drop_out = nn.Dropout(p=self.CONFIG['DROPOUT'])
        self.high_dropout = nn.Dropout(p=self.CONFIG['HIGH_DROPOUT'])

        n_weights = self.robertaconfig.num_hidden_layers + 1
        weights_init = torch.zeros(n_weights).float()
        weights_init.data[:self.CONFIG['MAIN_LAYERS']] = -3
        self.layer_weights = torch.nn.Parameter(weights_init)

        if self.CONFIG['QA_TASK']:
            if not self.CONFIG['ADD_TOKEN_LOSS']:
                self.classifier = nn.Linear(self.robertaconfig.hidden_size, 2)
            else:
                self.classifier = nn.Linear(self.robertaconfig.hidden_size, 3)
Esempio n. 3
0
    def __init__(self, CONFIG):
        super().__init__()
        self.CONFIG = CONFIG
        self.modelpath = "ahotrod/roberta_large_squad2"

        self.robertaconfig = AutoConfig.from_pretrained(self.modelpath)
        self.robertaconfig.output_hidden_states = True

        self.roberta = AutoModelForQuestionAnswering.from_pretrained(
            self.modelpath,
            config=self.robertaconfig,
        )._modules['roberta']

        self.dropout = nn.Dropout(p=self.CONFIG['DROPOUT'])
        self.l1 = AutoModelForQuestionAnswering.from_pretrained(
            self.modelpath,
            config=self.robertaconfig,
        )._modules['qa_outputs']
Esempio n. 4
0
 def __init__(self):
     model = AutoModelForQuestionAnswering.from_pretrained("/model/model")
     tokenizer = AutoTokenizer.from_pretrained("/model/tokenizer")
     self.default_response = "Perhaps the answer is 42."
     self.predictor = pipeline("question-answering",
                               model=model,
                               tokenizer=tokenizer)
     with open("/mounts/bert_context/paragraph.txt") as f:
         self.context = f.read()
Esempio n. 5
0
def test_conversion_adaptive_model_qa():
    farm_model = Converter.convert_from_transformers(
        "deepset/bert-base-cased-squad2", device="cpu")
    transformer_model = farm_model.convert_to_transformers()[0]
    transformer_model2 = AutoModelForQuestionAnswering.from_pretrained(
        "deepset/bert-base-cased-squad2")
    # compare weights
    for p1, p2 in zip(transformer_model.parameters(),
                      transformer_model2.parameters()):
        assert (p1.data.ne(p2.data).sum() == 0)
Esempio n. 6
0
    def convert_to_transformers(self):
        if len(self.prediction_heads) != 1:
            raise ValueError(
                f"Currently conversion only works for models with a SINGLE prediction head. "
                f"Your model has {len(self.prediction_heads)}")

        #TODO add more infos to config

        if self.prediction_heads[0].model_type == "span_classification":
            # init model
            transformers_model = AutoModelForQuestionAnswering.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            transformers_model.qa_outputs.load_state_dict(
                self.prediction_heads[0].feed_forward.feed_forward[0].
                state_dict())

        elif self.prediction_heads[0].model_type == "text_classification":
            # add more info to config
            self.language_model.model.config.id2label = {
                id: label
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.label2id = {
                label: id
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.finetuning_task = "text_classification"
            self.language_model.model.config.language = self.language_model.language

            # init model
            transformers_model = AutoModelForSequenceClassification.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            transformers_model.classifier.load_state_dict(
                self.prediction_heads[0].feed_forward.feed_forward[0].
                state_dict())

        else:
            raise NotImplementedError(
                f"FARM -> Transformers conversion is not supported yet for"
                f" prediction heads of type {self.prediction_heads[0].model_type}"
            )
        pass

        return transformers_model
Esempio n. 7
0
def test_conversion_adaptive_model(caplog):
    if caplog:
        caplog.set_level(logging.CRITICAL)

    model = AdaptiveModel.convert_from_transformers(
        "deepset/bert-base-cased-squad2",
        device="cpu",
        task_type="question_answering")
    transformer_model = model.convert_to_transformers()
    transformer_model2 = AutoModelForQuestionAnswering.from_pretrained(
        "deepset/bert-base-cased-squad2")
    # compare weights
    for p1, p2 in zip(transformer_model.parameters(),
                      transformer_model2.parameters()):
        assert (p1.data.ne(p2.data).sum() == 0)
Esempio n. 8
0
    def convert_to_transformers(self):
        if len(self.prediction_heads) != 1:
            raise ValueError(
                f"Currently conversion only works for models with a SINGLE prediction head. "
                f"Your model has {len(self.prediction_heads)}")
        elif len(self.prediction_heads[0].layer_dims) != 2:
            raise ValueError(
                f"Currently conversion only works for PredictionHeads that are a single layer Feed Forward NN with dimensions [LM_output_dim, number_classes].\n"
                f"            Your PredictionHead has {str(self.prediction_heads[0].layer_dims)} dimensions."
            )
        #TODO add more infos to config

        if self.prediction_heads[0].model_type == "span_classification":
            # init model
            transformers_model = AutoModelForQuestionAnswering.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            transformers_model.qa_outputs.load_state_dict(
                self.prediction_heads[0].feed_forward.feed_forward[0].
                state_dict())

        elif self.prediction_heads[0].model_type == "language_modelling":
            # init model
            transformers_model = AutoModelWithLMHead.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            ph_state_dict = self.prediction_heads[0].state_dict()
            ph_state_dict["transform.dense.weight"] = ph_state_dict.pop(
                "dense.weight")
            ph_state_dict["transform.dense.bias"] = ph_state_dict.pop(
                "dense.bias")
            ph_state_dict["transform.LayerNorm.weight"] = ph_state_dict.pop(
                "LayerNorm.weight")
            ph_state_dict["transform.LayerNorm.bias"] = ph_state_dict.pop(
                "LayerNorm.bias")
            transformers_model.cls.predictions.load_state_dict(ph_state_dict)
            logger.warning(
                "Currently only the Masked Language Modeling component of the prediction head is converted, "
                "not the Next Sentence Prediction or Sentence Order Prediction components"
            )

        elif self.prediction_heads[0].model_type == "text_classification":
            if self.language_model.model.base_model_prefix == "roberta":
                # Classification Heads in transformers have different architecture across Language Model variants
                # The RobertaClassificationhead has components: input2dense, dropout, tanh, dense2output
                # The tanh function cannot be mapped to current FARM style linear Feed Forward ClassificationHeads.
                # So conversion for this type cannot work. We would need a compatible FARM RobertaClassificationHead
                logger.error(
                    "Conversion for Text Classification with Roberta or XLMRoberta not possible at the moment."
                )
                raise NotImplementedError

            # add more info to config
            self.language_model.model.config.id2label = {
                id: label
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.label2id = {
                label: id
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.finetuning_task = "text_classification"
            self.language_model.model.config.language = self.language_model.language
            self.language_model.model.config.num_labels = self.prediction_heads[
                0].num_labels

            # init model
            transformers_model = AutoModelForSequenceClassification.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            transformers_model.classifier.load_state_dict(
                self.prediction_heads[0].feed_forward.feed_forward[0].
                state_dict())
        elif self.prediction_heads[0].model_type == "token_classification":
            # add more info to config
            self.language_model.model.config.id2label = {
                id: label
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.label2id = {
                label: id
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.finetuning_task = "token_classification"
            self.language_model.model.config.language = self.language_model.language
            self.language_model.model.config.num_labels = self.prediction_heads[
                0].num_labels

            # init model
            transformers_model = AutoModelForTokenClassification.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            transformers_model.classifier.load_state_dict(
                self.prediction_heads[0].feed_forward.feed_forward[0].
                state_dict())
        else:
            raise NotImplementedError(
                f"FARM -> Transformers conversion is not supported yet for"
                f" prediction heads of type {self.prediction_heads[0].model_type}"
            )
        pass

        return transformers_model
Esempio n. 9
0
import argparse
import os
import shutil

from transformers.modeling_auto import AutoModelForQuestionAnswering
from transformers.tokenization_auto import AutoTokenizer

parser = argparse.ArgumentParser()
parser.add_argument("--model")
args = parser.parse_args()

model = AutoModelForQuestionAnswering.from_pretrained(args.model)
tokenizer = AutoTokenizer.from_pretrained(args.model)

os.makedirs("model")
os.makedirs("tokenizer")

model.save_pretrained("model")
tokenizer.save_pretrained("tokenizer")

shutil.copyfile("model/config.json", "tokenizer/config.json")
Esempio n. 10
0
    def convert_to_transformers(self):
        if len(self.prediction_heads) != 1:
            raise ValueError(
                f"Currently conversion only works for models with a SINGLE prediction head. "
                f"Your model has {len(self.prediction_heads)}")

        #TODO add more infos to config

        if self.prediction_heads[0].model_type == "span_classification":
            # init model
            transformers_model = AutoModelForQuestionAnswering.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            transformers_model.qa_outputs.load_state_dict(
                self.prediction_heads[0].feed_forward.feed_forward[0].
                state_dict())

        elif self.prediction_heads[0].model_type == "language_modelling":
            # init model
            transformers_model = AutoModelWithLMHead.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            ph_state_dict = self.prediction_heads[0].state_dict()
            ph_state_dict["transform.dense.weight"] = ph_state_dict.pop(
                "dense.weight")
            ph_state_dict["transform.dense.bias"] = ph_state_dict.pop(
                "dense.bias")
            ph_state_dict["transform.LayerNorm.weight"] = ph_state_dict.pop(
                "LayerNorm.weight")
            ph_state_dict["transform.LayerNorm.bias"] = ph_state_dict.pop(
                "LayerNorm.bias")
            transformers_model.cls.predictions.load_state_dict(ph_state_dict)
            logger.warning(
                "Currently only the Masked Language Modeling component of the prediction head is converted, "
                "not the Next Sentence Prediction or Sentence Order Prediction components"
            )

        elif self.prediction_heads[0].model_type == "text_classification":
            # add more info to config
            self.language_model.model.config.id2label = {
                id: label
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.label2id = {
                label: id
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.finetuning_task = "text_classification"
            self.language_model.model.config.language = self.language_model.language
            self.language_model.model.config.num_labels = self.prediction_heads[
                0].num_labels

            # init model
            transformers_model = AutoModelForSequenceClassification.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            transformers_model.classifier.load_state_dict(
                self.prediction_heads[0].feed_forward.feed_forward[0].
                state_dict())
        elif self.prediction_heads[0].model_type == "token_classification":
            # add more info to config
            self.language_model.model.config.id2label = {
                id: label
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.label2id = {
                label: id
                for id, label in enumerate(self.prediction_heads[0].label_list)
            }
            self.language_model.model.config.finetuning_task = "token_classification"
            self.language_model.model.config.language = self.language_model.language
            self.language_model.model.config.num_labels = self.prediction_heads[
                0].num_labels

            # init model
            transformers_model = AutoModelForTokenClassification.from_config(
                self.language_model.model.config)
            # transfer weights for language model + prediction head
            setattr(transformers_model, transformers_model.base_model_prefix,
                    self.language_model.model)
            transformers_model.classifier.load_state_dict(
                self.prediction_heads[0].feed_forward.feed_forward[0].
                state_dict())
        else:
            raise NotImplementedError(
                f"FARM -> Transformers conversion is not supported yet for"
                f" prediction heads of type {self.prediction_heads[0].model_type}"
            )
        pass

        return transformers_model
Esempio n. 11
0
import time
import torch
import Bert_Embeddings as be
from transformers import pipeline
from flask import Flask, redirect, url_for, request, render_template, jsonify
from transformers.modeling_auto import AutoModelForQuestionAnswering
from transformers.tokenization_auto import AutoTokenizer

app = Flask(__name__)

tokenizer = AutoTokenizer.from_pretrained(
    "bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained(
    "bert-large-uncased-whole-word-masking-finetuned-squad")

qa_pipeline = pipeline(
    "question-answering",
    model="bert-large-uncased-whole-word-masking-finetuned-squad",
    tokenizer="bert-large-uncased-whole-word-masking-finetuned-squad")

path = 'source_sample_tesla.txt'
corpus_embeddings, embedder, corpus, sentences, para = be.load_model(path)


def sim_sent(query):
    start = time.time()
    queries = [query]
    results = be.ComputeSim(corpus_embeddings, embedder, queries)
    text = []
    for idx in results:
        text.append(corpus[idx])