def transformer_models(model_name): model = AutoModelForQuestionAnswering.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) qa_pipeline = pipeline('question-answering', model=model, tokenizer=tokenizer) return qa_pipeline
def __init__(self, CONFIG): super().__init__() self.CONFIG = CONFIG self.modelpath = "ahotrod/roberta_large_squad2" self.robertaconfig = AutoConfig.from_pretrained(self.modelpath) self.robertaconfig.output_hidden_states = True self.roberta = AutoModelForQuestionAnswering.from_pretrained( self.modelpath, config=self.robertaconfig, )._modules['roberta'] self.drop_out = nn.Dropout(p=self.CONFIG['DROPOUT']) self.high_dropout = nn.Dropout(p=self.CONFIG['HIGH_DROPOUT']) n_weights = self.robertaconfig.num_hidden_layers + 1 weights_init = torch.zeros(n_weights).float() weights_init.data[:self.CONFIG['MAIN_LAYERS']] = -3 self.layer_weights = torch.nn.Parameter(weights_init) if self.CONFIG['QA_TASK']: if not self.CONFIG['ADD_TOKEN_LOSS']: self.classifier = nn.Linear(self.robertaconfig.hidden_size, 2) else: self.classifier = nn.Linear(self.robertaconfig.hidden_size, 3)
def __init__(self, CONFIG): super().__init__() self.CONFIG = CONFIG self.modelpath = "ahotrod/roberta_large_squad2" self.robertaconfig = AutoConfig.from_pretrained(self.modelpath) self.robertaconfig.output_hidden_states = True self.roberta = AutoModelForQuestionAnswering.from_pretrained( self.modelpath, config=self.robertaconfig, )._modules['roberta'] self.dropout = nn.Dropout(p=self.CONFIG['DROPOUT']) self.l1 = AutoModelForQuestionAnswering.from_pretrained( self.modelpath, config=self.robertaconfig, )._modules['qa_outputs']
def __init__(self): model = AutoModelForQuestionAnswering.from_pretrained("/model/model") tokenizer = AutoTokenizer.from_pretrained("/model/tokenizer") self.default_response = "Perhaps the answer is 42." self.predictor = pipeline("question-answering", model=model, tokenizer=tokenizer) with open("/mounts/bert_context/paragraph.txt") as f: self.context = f.read()
def test_conversion_adaptive_model_qa(): farm_model = Converter.convert_from_transformers( "deepset/bert-base-cased-squad2", device="cpu") transformer_model = farm_model.convert_to_transformers()[0] transformer_model2 = AutoModelForQuestionAnswering.from_pretrained( "deepset/bert-base-cased-squad2") # compare weights for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()): assert (p1.data.ne(p2.data).sum() == 0)
def convert_to_transformers(self): if len(self.prediction_heads) != 1: raise ValueError( f"Currently conversion only works for models with a SINGLE prediction head. " f"Your model has {len(self.prediction_heads)}") #TODO add more infos to config if self.prediction_heads[0].model_type == "span_classification": # init model transformers_model = AutoModelForQuestionAnswering.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) transformers_model.qa_outputs.load_state_dict( self.prediction_heads[0].feed_forward.feed_forward[0]. state_dict()) elif self.prediction_heads[0].model_type == "text_classification": # add more info to config self.language_model.model.config.id2label = { id: label for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.label2id = { label: id for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.finetuning_task = "text_classification" self.language_model.model.config.language = self.language_model.language # init model transformers_model = AutoModelForSequenceClassification.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) transformers_model.classifier.load_state_dict( self.prediction_heads[0].feed_forward.feed_forward[0]. state_dict()) else: raise NotImplementedError( f"FARM -> Transformers conversion is not supported yet for" f" prediction heads of type {self.prediction_heads[0].model_type}" ) pass return transformers_model
def test_conversion_adaptive_model(caplog): if caplog: caplog.set_level(logging.CRITICAL) model = AdaptiveModel.convert_from_transformers( "deepset/bert-base-cased-squad2", device="cpu", task_type="question_answering") transformer_model = model.convert_to_transformers() transformer_model2 = AutoModelForQuestionAnswering.from_pretrained( "deepset/bert-base-cased-squad2") # compare weights for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()): assert (p1.data.ne(p2.data).sum() == 0)
def convert_to_transformers(self): if len(self.prediction_heads) != 1: raise ValueError( f"Currently conversion only works for models with a SINGLE prediction head. " f"Your model has {len(self.prediction_heads)}") elif len(self.prediction_heads[0].layer_dims) != 2: raise ValueError( f"Currently conversion only works for PredictionHeads that are a single layer Feed Forward NN with dimensions [LM_output_dim, number_classes].\n" f" Your PredictionHead has {str(self.prediction_heads[0].layer_dims)} dimensions." ) #TODO add more infos to config if self.prediction_heads[0].model_type == "span_classification": # init model transformers_model = AutoModelForQuestionAnswering.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) transformers_model.qa_outputs.load_state_dict( self.prediction_heads[0].feed_forward.feed_forward[0]. state_dict()) elif self.prediction_heads[0].model_type == "language_modelling": # init model transformers_model = AutoModelWithLMHead.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) ph_state_dict = self.prediction_heads[0].state_dict() ph_state_dict["transform.dense.weight"] = ph_state_dict.pop( "dense.weight") ph_state_dict["transform.dense.bias"] = ph_state_dict.pop( "dense.bias") ph_state_dict["transform.LayerNorm.weight"] = ph_state_dict.pop( "LayerNorm.weight") ph_state_dict["transform.LayerNorm.bias"] = ph_state_dict.pop( "LayerNorm.bias") transformers_model.cls.predictions.load_state_dict(ph_state_dict) logger.warning( "Currently only the Masked Language Modeling component of the prediction head is converted, " "not the Next Sentence Prediction or Sentence Order Prediction components" ) elif self.prediction_heads[0].model_type == "text_classification": if self.language_model.model.base_model_prefix == "roberta": # Classification Heads in transformers have different architecture across Language Model variants # The RobertaClassificationhead has components: input2dense, dropout, tanh, dense2output # The tanh function cannot be mapped to current FARM style linear Feed Forward ClassificationHeads. # So conversion for this type cannot work. We would need a compatible FARM RobertaClassificationHead logger.error( "Conversion for Text Classification with Roberta or XLMRoberta not possible at the moment." ) raise NotImplementedError # add more info to config self.language_model.model.config.id2label = { id: label for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.label2id = { label: id for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.finetuning_task = "text_classification" self.language_model.model.config.language = self.language_model.language self.language_model.model.config.num_labels = self.prediction_heads[ 0].num_labels # init model transformers_model = AutoModelForSequenceClassification.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) transformers_model.classifier.load_state_dict( self.prediction_heads[0].feed_forward.feed_forward[0]. state_dict()) elif self.prediction_heads[0].model_type == "token_classification": # add more info to config self.language_model.model.config.id2label = { id: label for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.label2id = { label: id for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.finetuning_task = "token_classification" self.language_model.model.config.language = self.language_model.language self.language_model.model.config.num_labels = self.prediction_heads[ 0].num_labels # init model transformers_model = AutoModelForTokenClassification.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) transformers_model.classifier.load_state_dict( self.prediction_heads[0].feed_forward.feed_forward[0]. state_dict()) else: raise NotImplementedError( f"FARM -> Transformers conversion is not supported yet for" f" prediction heads of type {self.prediction_heads[0].model_type}" ) pass return transformers_model
import argparse import os import shutil from transformers.modeling_auto import AutoModelForQuestionAnswering from transformers.tokenization_auto import AutoTokenizer parser = argparse.ArgumentParser() parser.add_argument("--model") args = parser.parse_args() model = AutoModelForQuestionAnswering.from_pretrained(args.model) tokenizer = AutoTokenizer.from_pretrained(args.model) os.makedirs("model") os.makedirs("tokenizer") model.save_pretrained("model") tokenizer.save_pretrained("tokenizer") shutil.copyfile("model/config.json", "tokenizer/config.json")
def convert_to_transformers(self): if len(self.prediction_heads) != 1: raise ValueError( f"Currently conversion only works for models with a SINGLE prediction head. " f"Your model has {len(self.prediction_heads)}") #TODO add more infos to config if self.prediction_heads[0].model_type == "span_classification": # init model transformers_model = AutoModelForQuestionAnswering.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) transformers_model.qa_outputs.load_state_dict( self.prediction_heads[0].feed_forward.feed_forward[0]. state_dict()) elif self.prediction_heads[0].model_type == "language_modelling": # init model transformers_model = AutoModelWithLMHead.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) ph_state_dict = self.prediction_heads[0].state_dict() ph_state_dict["transform.dense.weight"] = ph_state_dict.pop( "dense.weight") ph_state_dict["transform.dense.bias"] = ph_state_dict.pop( "dense.bias") ph_state_dict["transform.LayerNorm.weight"] = ph_state_dict.pop( "LayerNorm.weight") ph_state_dict["transform.LayerNorm.bias"] = ph_state_dict.pop( "LayerNorm.bias") transformers_model.cls.predictions.load_state_dict(ph_state_dict) logger.warning( "Currently only the Masked Language Modeling component of the prediction head is converted, " "not the Next Sentence Prediction or Sentence Order Prediction components" ) elif self.prediction_heads[0].model_type == "text_classification": # add more info to config self.language_model.model.config.id2label = { id: label for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.label2id = { label: id for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.finetuning_task = "text_classification" self.language_model.model.config.language = self.language_model.language self.language_model.model.config.num_labels = self.prediction_heads[ 0].num_labels # init model transformers_model = AutoModelForSequenceClassification.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) transformers_model.classifier.load_state_dict( self.prediction_heads[0].feed_forward.feed_forward[0]. state_dict()) elif self.prediction_heads[0].model_type == "token_classification": # add more info to config self.language_model.model.config.id2label = { id: label for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.label2id = { label: id for id, label in enumerate(self.prediction_heads[0].label_list) } self.language_model.model.config.finetuning_task = "token_classification" self.language_model.model.config.language = self.language_model.language self.language_model.model.config.num_labels = self.prediction_heads[ 0].num_labels # init model transformers_model = AutoModelForTokenClassification.from_config( self.language_model.model.config) # transfer weights for language model + prediction head setattr(transformers_model, transformers_model.base_model_prefix, self.language_model.model) transformers_model.classifier.load_state_dict( self.prediction_heads[0].feed_forward.feed_forward[0]. state_dict()) else: raise NotImplementedError( f"FARM -> Transformers conversion is not supported yet for" f" prediction heads of type {self.prediction_heads[0].model_type}" ) pass return transformers_model
import time import torch import Bert_Embeddings as be from transformers import pipeline from flask import Flask, redirect, url_for, request, render_template, jsonify from transformers.modeling_auto import AutoModelForQuestionAnswering from transformers.tokenization_auto import AutoTokenizer app = Flask(__name__) tokenizer = AutoTokenizer.from_pretrained( "bert-large-uncased-whole-word-masking-finetuned-squad") model = AutoModelForQuestionAnswering.from_pretrained( "bert-large-uncased-whole-word-masking-finetuned-squad") qa_pipeline = pipeline( "question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad", tokenizer="bert-large-uncased-whole-word-masking-finetuned-squad") path = 'source_sample_tesla.txt' corpus_embeddings, embedder, corpus, sentences, para = be.load_model(path) def sim_sent(query): start = time.time() queries = [query] results = be.ComputeSim(corpus_embeddings, embedder, queries) text = [] for idx in results: text.append(corpus[idx])