Ejemplo n.º 1
0
 def create_model(self, transformer="longformer"):
     if transformer == "distilbert":
         from transformers import DistilBertForQuestionAnswering
         self.model = DistilBertForQuestionAnswering.from_pretrained(
             "distilbert-base-uncased")
     elif transformer == "bert":
         from transformers import BertForQuestionAnswering
         self.model = BertForQuestionAnswering.from_pretrained(
             "bert-base-uncased")
     elif transformer == "roberta":
         from transformers import RobertaForQuestionAnswering
         self.model = RobertaForQuestionAnswering.from_pretrained(
             "roberta-base")
     elif transformer == "roberta_squad":
         from transformers import RobertaForQuestionAnswering
         self.model = RobertaForQuestionAnswering.from_pretrained(
             "deepset/roberta-base-squad2")
     elif transformer == "longformer":
         from transformers import LongformerForQuestionAnswering
         self.model = LongformerForQuestionAnswering.from_pretrained(
             "allenai/longformer-base-4096")
     elif transformer == "bart":
         from transformers import BartForQuestionAnswering
         self.model = BartForQuestionAnswering.from_pretrained(
             "facebook/bart-base")
     elif transformer == "electra":
         from transformers import ElectraForQuestionAnswering
         self.model = ElectraForQuestionAnswering.from_pretrained(
             "google/electra-small-discriminator")
     else:
         print(
             "The model you chose is not available in this version. You can try to manually change the code or manually overwrite the variable self.model"
         )
         print(
             "The available choices are 'distilbert' , 'bert' , 'roberta' , 'longformer' , 'bart' , 'electra' "
         )
Ejemplo n.º 2
0
def configure_tokenizer_model_roberta(args, logger, is_preprocess=False):
    logger.info("***** Loading tokenizer *****")
    tokenizer = RobertaTokenizer.from_pretrained(args.config_name,
                                                     do_lower_case=args.do_lower_case,
                                                 cache_dir=args.init_dir,)

    logger.info("***** Loading configuration *****")
    config = RobertaConfig.from_pretrained(args.config_name, cache_dir=args.init_dir)

    logger.info("Loading pretrained model from {}".format(args.init_dir))

    if is_preprocess:
        model = AutoModel.from_pretrained(args.model_name_or_path,
                                  config=config,
                                  cache_dir=args.init_dir)
    else:
        model = RobertaForQuestionAnswering.from_pretrained(args.init_dir, config=config,
                                                            cache_dir=args.init_dir)


    return tokenizer, model
 def __init__(self, hparams):
     super().__init__(hparams)
     self.net = RobertaForQuestionAnswering.from_pretrained(
         self.hparams.model)