Esempio n. 1
0
def load_untrained_model(args):

    if args.dataset in ['simple', 'synth', 'comp']:
        tokenizer = SimBertTokenizer()
        if args.dataset == 'simple':
            config = SimBertConfig()
        elif args.dataset == 'comp':
            config = CompBertConfig()
        else:
            config = SynBertConfig()
        model = RobertaForQuestionAnswering(config=config, )
    elif args.dataset in ['hpqa', 'squad']:
        config = AutoConfig.from_pretrained(
            args.config_name if args.config_name else args.model_name_or_path,
            cache_dir=args.cache_dir if args.cache_dir else None,
        )
        tokenizer = AutoTokenizer.from_pretrained(
            args.tokenizer_name
            if args.tokenizer_name else args.model_name_or_path,
            do_lower_case=args.do_lower_case,
            cache_dir=args.cache_dir if args.cache_dir else None,
        )
        model = AutoModelForQuestionAnswering.from_pretrained(
            args.model_name_or_path,
            from_tf=bool(".ckpt" in args.model_name_or_path),
            config=config,
            cache_dir=args.cache_dir if args.cache_dir else None,
        )
    else:
        raise RuntimeError('Dataset not supported')

    return config, tokenizer, model
Esempio n. 2
0
 def create_model(self, transformer="longformer"):
     if transformer == "distilbert":
         from transformers import DistilBertForQuestionAnswering
         self.model = DistilBertForQuestionAnswering.from_pretrained(
             "distilbert-base-uncased")
     elif transformer == "bert":
         from transformers import BertForQuestionAnswering
         self.model = BertForQuestionAnswering.from_pretrained(
             "bert-base-uncased")
     elif transformer == "roberta":
         from transformers import RobertaForQuestionAnswering
         self.model = RobertaForQuestionAnswering.from_pretrained(
             "roberta-base")
     elif transformer == "roberta_squad":
         from transformers import RobertaForQuestionAnswering
         self.model = RobertaForQuestionAnswering.from_pretrained(
             "deepset/roberta-base-squad2")
     elif transformer == "longformer":
         from transformers import LongformerForQuestionAnswering
         self.model = LongformerForQuestionAnswering.from_pretrained(
             "allenai/longformer-base-4096")
     elif transformer == "bart":
         from transformers import BartForQuestionAnswering
         self.model = BartForQuestionAnswering.from_pretrained(
             "facebook/bart-base")
     elif transformer == "electra":
         from transformers import ElectraForQuestionAnswering
         self.model = ElectraForQuestionAnswering.from_pretrained(
             "google/electra-small-discriminator")
     else:
         print(
             "The model you chose is not available in this version. You can try to manually change the code or manually overwrite the variable self.model"
         )
         print(
             "The available choices are 'distilbert' , 'bert' , 'roberta' , 'longformer' , 'bart' , 'electra' "
         )
Esempio n. 3
0
 def create_and_check_for_question_answering(
     self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
 ):
     model = RobertaForQuestionAnswering(config=config)
     model.to(torch_device)
     model.eval()
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         start_positions=sequence_labels,
         end_positions=sequence_labels,
     )
     self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
     self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
Esempio n. 4
0
def configure_tokenizer_model_roberta(args, logger, is_preprocess=False):
    logger.info("***** Loading tokenizer *****")
    tokenizer = RobertaTokenizer.from_pretrained(args.config_name,
                                                     do_lower_case=args.do_lower_case,
                                                 cache_dir=args.init_dir,)

    logger.info("***** Loading configuration *****")
    config = RobertaConfig.from_pretrained(args.config_name, cache_dir=args.init_dir)

    logger.info("Loading pretrained model from {}".format(args.init_dir))

    if is_preprocess:
        model = AutoModel.from_pretrained(args.model_name_or_path,
                                  config=config,
                                  cache_dir=args.init_dir)
    else:
        model = RobertaForQuestionAnswering.from_pretrained(args.init_dir, config=config,
                                                            cache_dir=args.init_dir)


    return tokenizer, model
Esempio n. 5
0
 def create_and_check_roberta_for_question_answering(
         self, config, input_ids, token_type_ids, input_mask,
         sequence_labels, token_labels, choice_labels):
     model = RobertaForQuestionAnswering(config=config)
     model.to(torch_device)
     model.eval()
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         start_positions=sequence_labels,
         end_positions=sequence_labels,
     )
     self.parent.assertListEqual(list(result["start_logits"].size()),
                                 [self.batch_size, self.seq_length])
     self.parent.assertListEqual(list(result["end_logits"].size()),
                                 [self.batch_size, self.seq_length])
     self.check_loss_output(result)
 def __init__(self, hparams):
     super().__init__(hparams)
     self.net = RobertaForQuestionAnswering.from_pretrained(
         self.hparams.model)