def transformer_models(model_name): model = AutoModelForQuestionAnswering.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) qa_pipeline = pipeline('question-answering', model=model, tokenizer=tokenizer) return qa_pipeline
def __init__(self, CONFIG): super().__init__() self.CONFIG = CONFIG self.modelpath = "ahotrod/roberta_large_squad2" self.robertaconfig = AutoConfig.from_pretrained(self.modelpath) self.robertaconfig.output_hidden_states = True self.roberta = AutoModelForQuestionAnswering.from_pretrained( self.modelpath, config=self.robertaconfig, )._modules['roberta'] self.drop_out = nn.Dropout(p=self.CONFIG['DROPOUT']) self.high_dropout = nn.Dropout(p=self.CONFIG['HIGH_DROPOUT']) n_weights = self.robertaconfig.num_hidden_layers + 1 weights_init = torch.zeros(n_weights).float() weights_init.data[:self.CONFIG['MAIN_LAYERS']] = -3 self.layer_weights = torch.nn.Parameter(weights_init) if self.CONFIG['QA_TASK']: if not self.CONFIG['ADD_TOKEN_LOSS']: self.classifier = nn.Linear(self.robertaconfig.hidden_size, 2) else: self.classifier = nn.Linear(self.robertaconfig.hidden_size, 3)
def __init__(self, CONFIG): super().__init__() self.CONFIG = CONFIG self.modelpath = "ahotrod/roberta_large_squad2" self.robertaconfig = AutoConfig.from_pretrained(self.modelpath) self.robertaconfig.output_hidden_states = True self.roberta = AutoModelForQuestionAnswering.from_pretrained( self.modelpath, config=self.robertaconfig, )._modules['roberta'] self.dropout = nn.Dropout(p=self.CONFIG['DROPOUT']) self.l1 = AutoModelForQuestionAnswering.from_pretrained( self.modelpath, config=self.robertaconfig, )._modules['qa_outputs']
def __init__(self): model = AutoModelForQuestionAnswering.from_pretrained("/model/model") tokenizer = AutoTokenizer.from_pretrained("/model/tokenizer") self.default_response = "Perhaps the answer is 42." self.predictor = pipeline("question-answering", model=model, tokenizer=tokenizer) with open("/mounts/bert_context/paragraph.txt") as f: self.context = f.read()
def test_conversion_adaptive_model_qa(): farm_model = Converter.convert_from_transformers( "deepset/bert-base-cased-squad2", device="cpu") transformer_model = farm_model.convert_to_transformers()[0] transformer_model2 = AutoModelForQuestionAnswering.from_pretrained( "deepset/bert-base-cased-squad2") # compare weights for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()): assert (p1.data.ne(p2.data).sum() == 0)
def test_conversion_adaptive_model(caplog): if caplog: caplog.set_level(logging.CRITICAL) model = AdaptiveModel.convert_from_transformers( "deepset/bert-base-cased-squad2", device="cpu", task_type="question_answering") transformer_model = model.convert_to_transformers() transformer_model2 = AutoModelForQuestionAnswering.from_pretrained( "deepset/bert-base-cased-squad2") # compare weights for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()): assert (p1.data.ne(p2.data).sum() == 0)
import argparse import os import shutil from transformers.modeling_auto import AutoModelForQuestionAnswering from transformers.tokenization_auto import AutoTokenizer parser = argparse.ArgumentParser() parser.add_argument("--model") args = parser.parse_args() model = AutoModelForQuestionAnswering.from_pretrained(args.model) tokenizer = AutoTokenizer.from_pretrained(args.model) os.makedirs("model") os.makedirs("tokenizer") model.save_pretrained("model") tokenizer.save_pretrained("tokenizer") shutil.copyfile("model/config.json", "tokenizer/config.json")
import time import torch import Bert_Embeddings as be from transformers import pipeline from flask import Flask, redirect, url_for, request, render_template, jsonify from transformers.modeling_auto import AutoModelForQuestionAnswering from transformers.tokenization_auto import AutoTokenizer app = Flask(__name__) tokenizer = AutoTokenizer.from_pretrained( "bert-large-uncased-whole-word-masking-finetuned-squad") model = AutoModelForQuestionAnswering.from_pretrained( "bert-large-uncased-whole-word-masking-finetuned-squad") qa_pipeline = pipeline( "question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad", tokenizer="bert-large-uncased-whole-word-masking-finetuned-squad") path = 'source_sample_tesla.txt' corpus_embeddings, embedder, corpus, sentences, para = be.load_model(path) def sim_sent(query): start = time.time() queries = [query] results = be.ComputeSim(corpus_embeddings, embedder, queries) text = [] for idx in results: text.append(corpus[idx])