Python BertForQuestionAnswering.from_pretrained Examples, transformers.BertForQuestionAnswering.from_pretrained Python Examples

Example #1

0

Show file

def load_and_predict(data_dir, model_type, pretrain_model):
    if model_type == 'bert_japanese':
        model = BertForQuestionAnswering.from_pretrained(
            'cl-tohoku/bert-base-japanese')
        tokenizer = BertJapaneseTokenizer.from_pretrained(
            'cl-tohoku/bert-base-japanese')

    if model_type == 'bert_multilingual':
        model = BertForQuestionAnswering.from_pretrained(
            'bert-base-multilingual-cased')
        tokenizer = BertTokenizer.from_pretrained(
            'bert-base-multilingual-cased', tokenize_chinese_chars=False)

    if model_type == 'albert':
        model = AlbertForQuestionAnswering.from_pretrained(
            'ALINEAR/albert-japanese-v2')
        tokenizer = AlbertTokenizer.from_pretrained(
            'ALINEAR/albert-japanese-v2')

    test_data = TestData(data_dir, TAG)
    testset = QADataset(test_data.examples, "test", tokenizer=tokenizer)
    testloader = DataLoader(testset, batch_size=4, collate_fn=collate_fn)

    model = model.to(device)
    model.load_state_dict(torch.load(pretrain_model))

    prediction = predict(model, testloader, device, tokenizer)
    prediction = func(data_dir, prediction)
    print('finish loading and predicting from {}!'.format(pretrain_model))
    return prediction  #prediction dictionary

Example #2

0

Show file

File: run_squad_minimal.py Project: lmtsui/bert_squad

def main():
   # Set seed
    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # Setup CUDA, GPU & distributed training
    device = torch.device("cuda")

    config = BertConfig.from_pretrained('bert-base-cased')
    tokenizer = BertTokenizer.from_pretrained('bert-base-cased',do_lower_case=True,)
    #the nn.module BertForQuestionAnswering has a single untrained layer qa_output: Linear(hidden_size,2) on top of the trained BERT-base.
    model = BertForQuestionAnswering.from_pretrained('bert-base-cased',config=config,)

    model.to(device)

    max_seq_length=384

    train_dataset = load_and_cache_examples(tokenizer, is_training=True)[0]

    # Training
    global_step, ave_loss = train(train_dataset, model, tokenizer)
    print(" global_step = %s, average loss = %s", global_step, tr_loss / global_step)

    # Save the trained model and the tokenizer
    output_dir = 'output/'

    # Create output directory if needed
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    print("Saving model checkpoint to %s", output_dir)
    # Save a trained model, configuration and tokenizer using `save_pretrained()`.
    # They can then be reloaded using `from_pretrained()`
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

    # Load a trained model and vocabulary that you have fine-tuned
    model = BertForQuestionAnswering.from_pretrained(output_dir)
    tokenizer = BertTokenizer.from_pretrained(output_dir, do_lower_case=True)
    model.to(device)

    # Evaluate
    results = evaluate(model, tokenizer)
    print("Results: {}".format(results))

    return result

Example #3

0

Show file

File: bert_qa.py Project: maishm/nherd

def load_qa_model():
    model = BertForQuestionAnswering.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')
    #Tokenizer
    tokenizer = BertTokenizer.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')
    return model, tokenizer

Example #4

0

Show file

def download_model():
    if (not Path("model_downloaded").is_file()
            or not Path("usecase_indicator.h5").is_file()):

        url = "https://b0ykepubbucket.s3-eu-west-1.amazonaws.com/usecase_indicator.h5"
        r = requests.get(url, stream=True)
        chunk_progress = 0
        with open("usecase_indicator.h5", "wb") as modelfile:
            for chunk in r.iter_content(chunk_size=8388608):
                if chunk:
                    modelfile.write(chunk)
                    chunk_progress += 1
                    print(
                        f"Downloading model 1/2 in background: {chunk_progress*8}MB"
                    )
                    sys.stdout.flush()
            else:
                open("model_downloaded", "w").close()

    if (not Path("modelqna_downloaded").is_file()
            or not Path("./BertLSquad/pytorch_model.bin").is_file()):
        print(f"Started model 2/2 download in background")
        sys.stdout.flush()
        model = BertForQuestionAnswering.from_pretrained(
            'bert-large-uncased-whole-word-masking-finetuned-squad')
        model.save_pretrained("./BertLSquad")
        open("modelqna_downloaded", "w").close()
        print("Model 2/2 download completed")
        sys.stdout.flush()

    return

Example #5

0

Show file

File: question_covid.py Project: wellcometrust/covid19

def train():
    with msg.loading("   Loading BERT"):
        TOKENIZER = BertTokenizer.from_pretrained('bert-base-uncased')
        MODEL = BertForQuestionAnswering.from_pretrained(
            'bert-large-uncased-whole-word-masking-finetuned-squad')
    msg.good("   BERT loaded")

    articles_dir = os.path.join(SCRIPT_PATH,
                                '../data/raw/CORD-19-research-challenge/')
    articles_folders = [
        'biorxiv_medrxiv/biorxiv_medrxiv/pdf_json/',
        'comm_use_subset/comm_use_subset/pdf_json/',
        'comm_use_subset/comm_use_subset/pmc_json/',
        'noncomm_use_subset/noncomm_use_subset/pdf_json/',
        'noncomm_use_subset/noncomm_use_subset/pmc_json/',
        'custom_license/custom_license/pdf_json/',
        'custom_license/custom_license/pmc_json/'
    ]
    meta_path = articles_dir + 'metadata.csv'

    with msg.loading("   Loading publications"):
        start = time.time()
        data_text, index2paperID, index2paperPath = get_data_texts(
            articles_dir, articles_folders, meta_path)
    msg.good("   Publications loaded - Took {:.2f}s".format(time.time() -
                                                            start))

    covid_q = QuestionCovid(TOKENIZER, MODEL, index2paperID, index2paperPath)
    covid_q.fit(data_text)
    return covid_q

Example #6

0

Show file

File: happy_bert.py Project: johndpope/happy-transformer

 def _get_question_answering(self):
     """
     Initializes the BertForQuestionAnswering transformer
     NOTE: This uses the bert-large-uncased-whole-word-masking-finetuned-squad pretraining for best results.
     """
     self.qa = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
     self.qa.eval()

Example #7

0

Show file

File: model.py Project: pleomax0730/BERT-DRCD-QuestionAnswering

 def __init__(self):
     self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     self.model_name = "nyust-eb210/braslab-bert-drcd-384"
     self.tokenizer = BertTokenizerFast.from_pretrained(self.model_name)
     self.model = BertForQuestionAnswering.from_pretrained(self.model_name).to(
         self.device
     )

Example #8

0

Show file

def answergen_bert(context, question):
    tokenizer = BertTokenizer.from_pretrained(
        'csarron/bert-base-uncased-squad-v1')
    model = BertForQuestionAnswering.from_pretrained(
        'csarron/bert-base-uncased-squad-v1')
    #tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad',return_token_type_ids = True)
    #model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

    encoding = tokenizer.encode_plus(question, context)

    input_ids, attention_mask = encoding["input_ids"], encoding[
        "attention_mask"]

    start_scores, end_scores = model(torch.tensor([input_ids]),
                                     attention_mask=torch.tensor(
                                         [attention_mask]))

    ans_tokens = input_ids[torch.argmax(start_scores[
        0, 1:]):torch.argmax(end_scores[0, 1:]) + 1]
    answer_tokens = tokenizer.convert_ids_to_tokens(ans_tokens)

    print("\nQuestion ", question)
    #print ("\nAnswer Tokens: ")
    #print (answer_tokens)

    answer_tokens_to_string = tokenizer.convert_tokens_to_string(answer_tokens)
    print("\nAnswer : ", answer_tokens_to_string)
    return answer_tokens_to_string

Example #9

0

Show file

File: QuestionAnswer.py Project: bertbook/Python_code

def get_answer_using_bert(question, reference_text):
    # Load fine-tuned model for QA
    bert_model = BertForQuestionAnswering.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')

    # Load Vocab as well
    bert_tokenizer = BertTokenizer.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')

    # Apply bert_tokenizer on input text
    input_ids = bert_tokenizer.encode(question, reference_text)
    input_tokens = bert_tokenizer.convert_ids_to_tokens(input_ids)

    # Search index of first [SEP] token
    sep_location = input_ids.index(bert_tokenizer.sep_token_id)
    first_seg_len, second_seg_len = sep_location + 1, len(input_ids) - (
        sep_location + 1)
    seg_embedding = [0] * first_seg_len + [1] * second_seg_len

    # Run our example on model
    model_scores = bert_model(torch.tensor([input_ids]),
                              token_type_ids=torch.tensor([seg_embedding]))
    ans_start_loc, ans_end_loc = torch.argmax(model_scores[0]), torch.argmax(
        model_scores[1])
    result = ' '.join(input_tokens[ans_start_loc:ans_end_loc + 1])

    # Return final result
    result = result.replace(' ##', '')
    return result

Example #10

0

Show file

File: qa_runner.py Project: ankur-gos/RE-Flex

    def __init__(self,
                 qa_path,
                 relations_filepath,
                 data_directory,
                 batch_size,
                 must_choose_answer,
                 device,
                 trained_to_reject,
                 calculate_single_error=True):
        self.trained_to_reject = trained_to_reject
        self.qa_path = qa_path  # path to qa weights
        self.relations_filepath = relations_filepath  # path to relations file
        self.data_directory = data_directory  # data directory path
        self.tokenizer = BertTokenizer.from_pretrained(
            'bert-large-cased')  # tokenizer
        self.model = BertForQuestionAnswering.from_pretrained(
            qa_path)  # Load the model
        self.model.to(device)
        self.device = device

        self.batch_size = batch_size
        self.must_choose_answer = must_choose_answer  # For datasets where there is always an answer, setting this to true will ensure that QA models that can return "answer doesn't exist" will always return a span in the context
        self.total_samples = 0
        if calculate_single_error:
            self.se_list = []
        else:
            self.se_list = None

Example #11

0

Show file

def load_model(model_path):
    model = BertForQuestionAnswering.from_pretrained(model_path)
    model.to(device)
    model.eval()
    model.zero_grad()

    return model

Example #12

0

Show file

File: model.py Project: rouzbeh-afrasiabi/Suicide-SQuAD

 def __init__(self,model_configs):
     self.model_configs=model_configs
     self.pretrained_model = BertForQuestionAnswering.from_pretrained(self.model_configs['pretrained_model_name'],
                                                          cache_dir=self.model_configs['cache_dir'],output_attentions=True)
     
     
     self.tokenizer = BertTokenizer.from_pretrained(self.model_configs['tokenizer_name'])

Example #13

0

Show file

File: app.py Project: kishanraaj77/29Mar_2

async def main(message: types.Message):
    import torch
    model = BertForQuestionAnswering.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')
    tokenizer = BertTokenizer.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')
    Question = 'The purpose of the NewsQA dataset'
    paragrah = 'With massive volumes of written text being produced every second, how do we make sure that we have the most recent and relevant information available to us? Microsoft research Montreal is tackling this problem by building AI systems that can read and comprehend large volumes of complex text in real-time. The purpose of the NewsQA dataset is to help the research community build algorithms that are capable of answering questions requiring human-level comprehension and reasoning skills.'
    encoding = tokenizer.encode_plus(text=Question,
                                     text_pair=paragrah,
                                     add_special=True)
    # token embedding
    inputs = encoding['input_ids']
    #3 segment embedgin
    sentence_embed = encoding['token_type_ids']
    # input tokens
    tokens = tokenizer.convert_ids_to_tokens(inputs)
    start_scores, end_scores = model(input_ids=torch.tensor([inputs]),
                                     token_type_ids=torch.tensor(
                                         [sentence_embed]),
                                     return_dict=False)
    start_index = torch.argmax(start_scores)
    end_index = torch.argmax(end_scores)

    answer = ' '.join(tokens[start_index:end_index + 1])

    await message.reply(text=answer)

Example #14

0

Show file

File: train.py Project: jerrylsu/cmrc

def train(args):
    model = BertForQuestionAnswering.from_pretrained(PRETRAINED_MODEL_PATH)
    tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_PATH)
    model.resize_token_embeddings(len(tokenizer))
    datasets = CMRC2018(args=args, tokenizer=tokenizer)()

    training_args = TrainingArguments(
        output_dir=args.model_path,
        per_device_train_batch_size=args.batch_size,
        per_device_eval_batch_size=args.batch_size,
        warmup_steps=args.warmup_steps,
        remove_unused_columns=False,
        logging_dir=args.log_path,
        num_train_epochs=args.n_epochs,
        dataloader_num_workers=args.num_workers,
        evaluation_strategy='epoch')
    print(
        f"Train dataset size: {len(datasets['train'])}, Validation dataset size: {len(datasets['validation'])}"
    )
    trainer = Trainer(model=model,
                      args=training_args,
                      train_dataset=datasets['train'],
                      eval_dataset=datasets['validation'])
    trainer.train()
    trainer.save_model()
    pass

Example #15

0

Show file

 def load_model(self, model_path: str, do_lower_case=False):
     config = BertConfig.from_pretrained(model_path + "/bert_config.json")
     tokenizer = BertTokenizer.from_pretrained(
         model_path, do_lower_case=do_lower_case)
     model = BertForQuestionAnswering.from_pretrained(
         model_path, from_tf=False, config=config)
     return model, tokenizer

Example #16

0

Show file

def nlpQuestion(question):
    print("nlptriggered")
    text = """
    Coronaviruses are a large family of viruses that can cause illness in animals or humans. In humans, several coronaviruses are known to cause respiratory infections ranging from the common cold to more severe diseases such as Middle East Respiratory Syndrome (MERS) and Severe Acute Respiratory Syndrome (SARS). COVID-19 is a virus of the same family with a first recorded outbreak in Wuhan, China, in December 2019. The most common symptoms of COVID-19 are fever, tiredness, and dry cough. Other symptoms include aches and pains, nasal congestion, runny nose, sore throat or diarrhea. These symptoms are usually mild and begin gradually. Some people become infected but don’t develop any symptoms and don't feel unwell. Most people (about 80%) recover from the disease without needing special treatment. Around 1 out of every 6 people who gets COVID-19 becomes seriously ill and develops difficulty breathing. Older people, and those with underlying medical problems like high blood pressure, heart problems or diabetes, are more likely to develop serious illness. People with fever, cough and difficulty breathing should seek medical attention. People can catch COVID-19 from others who have the virus. The disease can spread from person to person through small droplets from the nose or mouth which are spread when a person with COVID-19 coughs or exhales. These droplets land on objects and surfaces around the person. Other people then catch COVID-19 by touching these objects or surfaces, then touching their eyes, nose or mouth. People can also catch COVID-19 if they breathe in droplets from a person with COVID-19 who coughs out or exhales droplets. This is why it is important to stay more than 1 meter (3 feet) away from a person who is sick.
    Studies to date suggest that the virus that causes COVID-19 is mainly transmitted through contact with respiratory droplets rather than through the air. 
    There have been 105000 confirmed cases of coronovirus in the world, with 3100 deaths. There are only 32 confirmed cases in Lebanon. If you are experiencing symptoms, call MOPH on 1214 or 76592699.
    """

    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertForQuestionAnswering.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')
    print("nlptriggered")

    input_ids = tokenizer.encode(question, text)
    token_type_ids = [
        0 if i <= input_ids.index(102) else 1 for i in range(len(input_ids))
    ]
    start_scores, end_scores = model(torch.tensor([input_ids]),
                                     token_type_ids=torch.tensor(
                                         [token_type_ids]))
    print("nlptriggered")

    all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
    answer = ' '.join(
        all_tokens[torch.argmax(start_scores):torch.argmax(end_scores) +
                   1]).replace(' ##', '')

    print(answer)
    return answer

Example #17

0

Show file

def configure_tokenizer_model_bert(args, logger, is_preprocess=False):
    logger.info("***** Loading tokenizer *****")
    tokenizer = BertTokenizer.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case)

    # logger.info("Loading configuration from {}".format(args.cache_dir))
    logger.info("***** Loading configuration from {} ******".format(
        args.init_dir))
    config = BertConfig.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        cache_dir=args.init_dir)
    config.vocab_size = len(tokenizer.vocab)

    logger.info("***** Loading pretrained model from {} *****".format(
        args.init_dir))
    if is_preprocess:
        model = AutoModel.from_pretrained(args.model_name_or_path,
                                          config=config,
                                          cache_dir=args.init_dir)
    else:
        model = BertForQuestionAnswering.from_pretrained(
            args.init_dir, config=config, cache_dir=args.init_dir)

    return tokenizer, model

Example #18

0

Show file

File: create_model.py Project: rogervaas/nn_pruning

    def create_graphics(self, url_base, model_card_path):
        pruned_heads = self.checkpoint_info["config"].get("pruned_heads")
        ret = {}
        if pruned_heads is not None:
            pruning_info_plotter = PruningInfoBokehPlotter(
                "pruning_info", self.JS_PATH)
            fig, js, html = pruning_info_plotter.run(layer_count=12,
                                                     pruned_heads=pruned_heads,
                                                     heads_count=12)
            ret["pruning_info"] = dict(js=js, html=html)

        density_plotter = DensityBokehPlotter("density", self.JS_PATH)

        model = BertForQuestionAnswering.from_pretrained(self.git_path)

        fig, js, html = density_plotter.run(model=model,
                                            dest_path=model_card_path /
                                            "images",
                                            url_base=url_base + "/images")
        ret["density_info"] = dict(js=js, html=html)

        from bokeh.io import export_png

        export_png(fig, filename="/tmp/plot.png")

        return ret

Example #19

0

Show file

 def load_model(self):
     config = BertConfig.from_pretrained(self.c_path)
     self.model = BertForQuestionAnswering.from_pretrained(
         'bert-base-uncased', config=config)
     self.model.to(self.device)
     self.model.eval()
     return self.model

Example #20

0

Show file

File: predict.py Project: YoshitakaOyama/BERT-large_demo

def answer_question(question, answer_text, model_name=None, tokenizer_name=None):
    """
    Takes a `question` string and an `answer_text` string (which contains the
    answer), and identifies the words within the `answer_text` that are the
    answer.

    Parameters
    ----------
    question : str
    answer_text : str
    model : str
    tokenizer : str

    Return
    -------
    answer : str
    """
    # ======== Model & Tokenizer (default: bert-large finetuned squad ver.1)========
    if model_name is None:
        model_name = 'bert-large-uncased-whole-word-masking-finetuned-squad'
    if tokenizer_name is None:
        tokenizer_name = 'bert-large-uncased-whole-word-masking-finetuned-squad'
    
    model = BertForQuestionAnswering.from_pretrained(model_name)
    tokenizer = BertTokenizer.from_pretrained(tokenizer_name)

    # ======== Tokenize ========
    input_ids = tokenizer.encode(question, answer_text)

    # Report how long the input sequence is.
    # print(f"Query has {len(input_ids):,} tokens.\n")

    # ======== Set Segment IDs ========
    sep_index = input_ids.index(tokenizer.sep_token_id)
    num_seg_a = sep_index + 1
    num_seg_b = len(input_ids) - num_seg_a
    segment_ids = [0]*num_seg_a + [1]*num_seg_b

    # There should be a segment_id for every input token.
    assert len(segment_ids) == len(input_ids)

    # ======== Evaluate ========
    start_scores, end_scores = model(
        torch.tensor([input_ids]), token_type_ids=torch.tensor([segment_ids])
        )

    # ======== Reconstruct Answer ========
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)

    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    answer = tokens[answer_start]
    for i in range(answer_start + 1, answer_end + 1):
        if tokens[i][0:2] == '##':
            answer += tokens[i][2:]
        else:
            answer += ' ' + tokens[i]

    return answer

Example #21

0

Show file

 def __init__(self, model_dir, cache_dir):
     # assert the config file, pretrained model existing in the dir
     self.config = BertConfig.from_pretrained(model_dir,
                                              cache_dir=cache_dir)
     self.tokenizer = BertTokenizer.from_pretrained(model_dir,
                                                    cache_dir=cache_dir)
     self.model = BertForQuestionAnswering.from_pretrained(
         model_dir, cache_dir=cache_dir)

Example #22

0

Show file

    def __init__(self,
                 model: str = None,
                 lowercase=True,
                 tokenizer=BertTokenizer):

        self.lowercase = lowercase
        self.tokenizer = tokenizer.from_pretrained(model)
        self.model = BertForQuestionAnswering.from_pretrained(model)

Example #23

0

Show file

 def __init__(
     self,
     pre_trained_name='bert-large-uncased-whole-word-masking-finetuned-squad'
 ):
     self.pre_trained_name = pre_trained_name
     self.model = BertForQuestionAnswering.from_pretrained(
         self.pre_trained_name)
     self.tokenizer = BertTokenizer.from_pretrained(self.pre_trained_name)

Example #24

0

Show file

File: model.py Project: Param9498/contextual-qa-nlp

	def __init__(self):

		# BERT Finetuned on SQUAD

		self.bert_tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
		self.squad_finetuned_model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
		self.squad_finetuned_model = self.squad_finetuned_model.eval()
		self.squad_finetuned_model = self.squad_finetuned_model.to(device)

Example #25

0

Show file

def init_bert():
    global bert_model
    global bert_tokenizer
    from transformers import BertForQuestionAnswering
    from transformers import BertTokenizer
    bert_model = BertForQuestionAnswering.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')
    bert_tokenizer = BertTokenizer.from_pretrained(
        'bert-large-uncased-whole-word-masking-finetuned-squad')

Example #26

0

Show file

File: question_generator.py Project: PiotrekSkalski/QA_synthetic_dataset

def main():
    parser = get_parser()
    args = parser.parse_args()

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Set device
    args.device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO)
    logging.getLogger("transformers.generation_utils").setLevel(logging.ERROR)

    # Load pretrained question generation model and tokenizer
    GPT2_tokenizer = GPT2Tokenizer.from_pretrained(
        args.question_generation_model, do_lower_case=args.do_lower_case)
    GPT2_model = GPT2LMHeadModel.from_pretrained(
        args.question_generation_model)
    GPT2_model.prepare_inputs_for_generation = prepare_inputs_for_generation
    GPT2_model.eval()
    GPT2_model.to(args.device)

    BERT_tokenizer = BertTokenizer.from_pretrained(
        args.answering_model, do_lower_case=args.do_lower_case)
    BERT_model = BertForQuestionAnswering.from_pretrained(args.answering_model)
    BERT_model.eval()
    BERT_model.to(args.device)

    logging.info("Parameters %s", args)

    # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set.
    # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will
    # remove the need for this code, but it is still valid.
    if args.fp16:
        try:
            from apex import amp
            amp.register_half_function(torch, "einsum")
            GPT2_model = amp.initialize(GPT2_model,
                                        opt_level=args.fp16_opt_level)
            BERT_model = amp.initialize(BERT_model,
                                        opt_level=args.fp16_opt_level)
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )

    generate(args, GPT2_tokenizer, GPT2_model, BERT_tokenizer, BERT_model)

Example #27

0

Show file

File: baseline_eval.py Project: rdongre7/MyCourseIndex

def model_pick(id):
  if (id == 0):
    tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
    model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
  if (id == 1):
    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")
    model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad")

  return tokenizer, model

Example #28

0

Show file

 def __init__(
         self,
         pretrained='bert-large-uncased-whole-word-masking-finetuned-squad'
 ):
     self.torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
     self.QA_MODEL = Bert4QA.from_pretrained(pretrained)
     self.QA_MODEL.to(self.torch_device)
     self.QA_MODEL.eval()
     self.QA_TOKENIZER = BertTokenizer.from_pretrained(pretrained)

Example #29

0

Show file

 def __init__(self, args):
     self.args = args
     self.model = BertForQuestionAnswering.from_pretrained(self.args.model_path).to(self.args.device)
     self.tokenizer = BertTokenizer.from_pretrained('hfl/chinese-roberta-wwm-ext')
     self.dataset = CMRC2018(args=args, tokenizer=self.tokenizer)()
     self.validation_dataloader = DataLoader(self.dataset['validation'],
                                             batch_size=self.args.batch_size,
                                             collate_fn=custom_collate,
                                             num_workers=self.args.num_workers)
     pass

Example #30

0

Show file

File: model_wrapper.py Project: naveenjafer/explain-BERT-QA

    def load_model(self):
        # Load a pretrained model that has been fine-tuned
        config = BertConfig.from_pretrained(self.model_type, output_hidden_states=True, cache_dir=self.cache_dir)

        pretrained_weights = torch.load(self.model_path, map_location=torch.device(self.device))
        model = BertForQuestionAnswering.from_pretrained(self.model_type,
                                                         state_dict=pretrained_weights,
                                                         config=config,
                                                         cache_dir=self.cache_dir)
        return model