Example #1
0
    def __init__(self, args, dictionary, left_pad=False):
        super().__init__(dictionary)
        self.dropout = args.dropout

        from pytorch_transformers import RobertaModel, BertModel
        from pytorch_transformers.file_utils import PYTORCH_TRANSFORMERS_CACHE
        from pytorch_transformers import RobertaConfig, RobertaTokenizer, BertConfig, BertTokenizer

        if args.pretrained_bert_model.startswith('roberta'):
            self.embed = RobertaModel.from_pretrained(args.pretrained_bert_model,
                    cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank))
            self.context = RobertaModel.from_pretrained(args.pretrained_bert_model,
                    cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank))
            self.config = RobertaConfig.from_pretrained(args.pretrained_bert_model)
            self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

            
        else:
            self.embed = BertModel.from_pretrained(args.pretrained_bert_model,
                    cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank))
            self.context = BertModel.from_pretrained(args.pretrained_bert_model,
                    cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank))
            self.config = BertConfig.from_pretrained(args.pretrained_bert_model)
            self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

        self.padding_idx = self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token)
Example #2
0
    def __init__(self, config):
        super(RobertaForMultipleChoice, self).__init__(config)

        self.roberta = RobertaModel(config)
        self.classifier = RobertaClassificationHead(config)

        self.apply(self.init_weights)
Example #3
0
 def __init__(self):
     super(PretrainedModel, self).__init__()
     self.model = RobertaModel.from_pretrained("roberta-large",
                                               output_hidden_states=True)
     self.config = self.model.config
     for p in self.parameters():
         p.requires_grad = False
Example #4
0
        def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels,
                                           token_labels, choice_labels):
            model = RobertaModel(config=config)
            model.eval()
            sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
            sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids)
            sequence_output, pooled_output = model(input_ids)

            result = {
                "sequence_output": sequence_output,
                "pooled_output": pooled_output,
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].size()),
                [self.batch_size, self.seq_length, self.hidden_size])
            self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
Example #5
0
 def __init__(self,
              bert_path,
              bert_name='roberta-base',
              fine_tune=False,
              use_lstm=False,
              num_layers=2,
              bidirectional=False):
     super(QuestionEmbeddingModule, self).__init__()
     self.use_lstm = use_lstm
     self.bert_name = bert_name
     if self.use_lstm:
         # todo: 这里的知识图谱实体数量需根据所使用的具体知识图谱进行调整,下同
         self.question_embed = torch.nn.Sequential(
             torch.nn.Embedding(num_embeddings=50265, embedding_dim=256),
             torch.nn.LSTM(input_size=256,
                           hidden_size=768,
                           num_layers=num_layers,
                           bidirectional=bidirectional,
                           batch_first=True))
     else:
         logger.info(
             'loading pretrained bert model from {}'.format(bert_path +
                                                            bert_name))
         if self.bert_name == 'roberta-base':
             self.question_embed = RobertaModel.from_pretrained(bert_path +
                                                                bert_name)
         elif self.bert_name == 'bert-base-uncased':
             self.question_embed = BertModel.from_pretrained(bert_path +
                                                             bert_name)
         else:
             raise Exception('bert model unspecified!')
         if not fine_tune:
             for param in self.question_embed.parameters():
                 param.requires_grad = False
Example #6
0
def main():
    best_result = float("-inf")
    logger.info("Loading data...")
    train_itr = DropBatchGen(args, data_mode="train", tokenizer=tokenizer)
    dev_itr = DropBatchGen(args, data_mode="dev", tokenizer=tokenizer)
    num_train_steps = int(args.max_epoch * len(train_itr) /
                          args.gradient_accumulation_steps)
    logger.info("Num update steps {}!".format(num_train_steps))

    logger.info("Build bert model.")
    bert_model = RobertaModel.from_pretrained(args.roberta_model)

    logger.info("Build Drop model.")
    network = NumericallyAugmentedBertNet(
        bert_model,
        hidden_size=bert_model.config.hidden_size,
        dropout_prob=args.dropout,
        use_gcn=args.use_gcn,
        gcn_steps=args.gcn_steps)

    logger.info("Build optimizer etc...")
    model = DropBertModel(args, network, num_train_step=num_train_steps)

    train_start = datetime.now()
    first = True

    for epoch in range(1, args.max_epoch + 1):
        model.avg_reset()
        if not first:
            train_itr.reset()
        first = False
        logger.info('At epoch {}'.format(epoch))
        for step, batch in enumerate(train_itr):
            model.update(batch)
            if model.step % (
                    args.log_per_updates *
                    args.gradient_accumulation_steps) == 0 or model.step == 1:
                logger.info(
                    "Updates[{0:6}] train loss[{1:.5f}] train em[{2:.5f}] f1[{3:.5f}] remaining[{4}]"
                    .format(
                        model.updates, model.train_loss.avg, model.em_avg.avg,
                        model.f1_avg.avg,
                        str((datetime.now() - train_start) / (step + 1) *
                            (num_train_steps - step - 1)).split('.')[0]))
                model.avg_reset()
        total_num, eval_loss, eval_em, eval_f1 = model.evaluate(dev_itr)
        logger.info(
            "Eval {} examples, result in epoch {}, eval loss {}, eval em {} eval f1 {}."
            .format(total_num, epoch, eval_loss, eval_em, eval_f1))

        if eval_f1 > best_result:
            save_prefix = os.path.join(args.save_dir, "checkpoint_best")
            model.save(save_prefix, epoch)
            best_result = eval_f1
            logger.info("Best eval F1 {} at epoch {}".format(
                best_result, epoch))

    logger.info("done training in {} seconds!".format(
        (datetime.now() - train_start).seconds))
Example #7
0
    def __init__(self, config):
        super(RobertaForRR, self).__init__(config)

        self.num_labels = config.num_labels
        self.roberta = RobertaModel(config)
        self.classifier = RobertaClassificationHead(config)

        self.apply(self.init_weights)
Example #8
0
 def test_model_from_pretrained(self):
     cache_dir = "/tmp/pytorch_transformers_test/"
     for model_name in list(
             ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
         model = RobertaModel.from_pretrained(model_name,
                                              cache_dir=cache_dir)
         shutil.rmtree(cache_dir)
         self.assertIsNotNone(model)
 def __init__(self, config, tie_weights):
     super(RoBertaMCQWeightedSumScore, self).__init__(config)
     self.roberta = RobertaModel(config)
     self._dropout = nn.Dropout(config.hidden_dropout_prob)
     self._classification_layer = nn.Linear(config.hidden_size, 1)
     if tie_weights is True:
         self._weight_layer = self._classification_layer
     else:
         self._weight_layer = nn.Linear(config.hidden_size, 1)
     self.apply(self.init_weights)
Example #10
0
    def __init__(self, config):
        super(RobertaForRRWithNodeLoss, self).__init__(config)

        self.num_labels = config.num_labels
        self.roberta = RobertaModel(config)
        self.classifier = RobertaClassificationHead(config)
        self.naf_layer = nn.Linear(config.hidden_size, config.hidden_size)
        self.classifier_node = NodeClassificationHead(config)

        self.apply(self.init_weights)
Example #11
0
    def __init__(self, config, num_choices=1, num_docs_rank=30):
        super(RobertaForMultipleChoice, self).__init__(config)

        self.num_choices = num_choices
        self.roberta = RobertaModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_choices)

        self.num_docs_rank = num_docs_rank
        self.apply(self.init_weights)
Example #12
0
    def __init__(self, model_name_or_path: str, max_seq_length: int = 128, do_lower_case: bool = True):
        super(RoBERTa, self).__init__()
        self.config_keys = ['max_seq_length', 'do_lower_case']
        self.max_seq_length = max_seq_length
        self.do_lower_case = do_lower_case

        self.roberta = RobertaModel.from_pretrained(model_name_or_path)
        self.tokenizer = RobertaTokenizer.from_pretrained(model_name_or_path, do_lower_case=do_lower_case)
        self.cls_token_id = self.tokenizer.convert_tokens_to_ids([self.tokenizer.cls_token])[0]
        self.sep_token_id = self.tokenizer.convert_tokens_to_ids([self.tokenizer.sep_token])[0]
Example #13
0
 def __init__(self, config):
     super(RoBertaMCQMAC, self).__init__(config)
     config.output_attentions = True
     self.roberta = RobertaModel(config)
     self._dropout = nn.Dropout(config.hidden_dropout_prob)
     self._classification_layer = nn.Linear(config.hidden_size, 1)
     self._key_components_detection_layer = nn.Linear(
         3 * config.hidden_size, 1)
     self._attention_layer = 13
     self._attention_head = 4
     self.apply(self.init_weights)
Example #14
0
def test_data(args):
	# result_dir = "../results/"+args['in_dir'].split("/")[-2]+"-"+args['model_dir'].split("/")[-2]
	result_dir = args["save_dir"]+args["save_folder"]
	

	if args['embed_size'] == 768:
		model = RobertaModel.from_pretrained('roberta-base').cuda()
	else:
		model = RobertaModel.from_pretrained('roberta-large').cuda()

	if args['parallel']:
		model = nn.DataParallel(model)
		
	classifier = FeedForward(args['embed_size'],int(args['embed_size']/2),args['nooflabels']).cuda()

	checkpoint = torch.load(args['model_dir']+args['model_name'])
	model.load_state_dict(checkpoint['model_state_dict'])
	classifier.load_state_dict(checkpoint['classifier_state_dict'])

	

	for split in args["eval_splits"]:
		try:
			data_file = open(args['in_dir']+split+".pkl",'rb')
			data = pickle.load(data_file)
			# print(len(data['encodings']))
			acc,gold,pred = test(model,classifier,data)
			print("{} accuracy: {}".format(split, acc))
			
			results = {"accuracy": acc,
					"gold": gold, 
					"pred": pred}

			if args['save_enable']!=0:
				if not os.path.isdir(result_dir):
					os.mkdir(result_dir)
				with open(result_dir+"/predict_"+split+".json", 'w') as fp:
					json.dump(results, fp)

		except FileNotFoundError:
			print("{}.pkl file doesn't exist".format(split))
Example #15
0
    def test_inference_no_head(self):
        model = RobertaModel.from_pretrained('roberta-base')

        input_ids = torch.tensor(
            [[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
        output = model(input_ids)[0]
        # compare the actual values for a slice.
        expected_slice = torch.Tensor([[[-0.0231, 0.0782, 0.0074],
                                        [-0.1854, 0.0539, -0.0174],
                                        [0.0548, 0.0799, 0.1687]]])
        self.assertTrue(
            torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
Example #16
0
    def __init__(self, opt):
        self.opt = opt

        if 'aen_simple' == opt.model_name:
            if 'bert' == opt.bert_type:
                tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                           opt.pretrained_bert_name)
                bert = BertModel.from_pretrained(opt.pretrained_bert_name)
                self.model = opt.model_class(bert, opt).to(opt.device)
            elif 'roberta' == opt.bert_type:
                tokenizer = Tokenizer4RoBerta(opt.max_seq_len,
                                              opt.pretrained_bert_name)
                roberta = RobertaModel.from_pretrained(
                    opt.pretrained_bert_name)
                self.model = opt.model_class(roberta, opt).to(opt.device)
        elif 'roberta' in opt.model_name:
            tokenizer = Tokenizer4RoBerta(opt.max_seq_len,
                                          opt.pretrained_bert_name)
            roberta = RobertaModel.from_pretrained(opt.pretrained_bert_name)
            self.model = opt.model_class(roberta, opt).to(opt.device)
        elif 'bert' in opt.model_name:
            tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                       opt.pretrained_bert_name)
            bert = BertModel.from_pretrained(opt.pretrained_bert_name)
            self.model = opt.model_class(bert, opt).to(opt.device)

        self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer)
        self.testset = ABSADataset(opt.dataset_file['test'], tokenizer)
        assert 0 <= opt.valset_ratio < 1
        if opt.valset_ratio > 0:
            valset_len = int(len(self.trainset) * opt.valset_ratio)
            self.trainset, self.valset = random_split(
                self.trainset, (len(self.trainset) - valset_len, valset_len))
        else:
            self.valset = self.testset

        if opt.device.type == 'cuda':
            logger.info('cuda memory allocated: {}'.format(
                torch.cuda.memory_allocated(device=opt.device.index)))
        self._print_args()
Example #17
0
    def __init__(
        self,
        base_model_path,
        base_model_name,
        is_custom_pretrained,
        base_model_feature_size,
        additional_feature_size,
        num_classes,
        rnn_dimension,
        linear_1_dimension,
    ):
        super(MatchArchitecture, self).__init__()
        if not is_custom_pretrained:
            self.base_model = RobertaModel.from_pretrained(base_model_name)
        else:
            self.base_model = RobertaModel.from_pretrained(base_model_path)

        for param in self.base_model:
            param.required_grad = False

        self.match_head = MatchHead(base_model_feature_size,
                                    additional_feature_size, num_classes,
                                    rnn_dimension, linear_1_dimension)
Example #18
0
 def __init__(self, model):
     super().__init__()
     if 'roberta' in model:
         print("Roberta model: {}".format(model))
         self.tokenizer = RobertaTokenizer.from_pretrained(model)
         self.bert = RobertaModel.from_pretrained(model)
     else:
         print("Bert model: {}".format(model))
         self.tokenizer = BertTokenizer.from_pretrained(model)
         self.bert = BertModel.from_pretrained(model)
     self.dim = self.bert.pooler.dense.in_features
     self.max_len = self.bert.embeddings.position_embeddings.num_embeddings
     
     if use_cuda:
         self.cuda()
Example #19
0
    def __init__(self, opt):
        self.opt = opt
        if 'roberta' in opt.pretrained_bert_name:
            tokenizer = RobertaTokenizer.from_pretrained(
                opt.pretrained_bert_name)
            transformer = RobertaModel.from_pretrained(
                opt.pretrained_bert_name, output_attentions=True)
        elif 'bert' in opt.pretrained_bert_name:
            tokenizer = BertTokenizer.from_pretrained(opt.pretrained_bert_name)
            transformer = BertModel.from_pretrained(opt.pretrained_bert_name,
                                                    output_attentions=True)
        elif 'xlnet' in opt.pretrained_bert_name:
            tokenizer = XLNetTokenizer.from_pretrained(
                opt.pretrained_bert_name)
            transformer = XLNetModel.from_pretrained(opt.pretrained_bert_name,
                                                     output_attentions=True)
        if 'bert' or 'xlnet' in opt.model_name:
            tokenizer = Tokenizer4Pretrain(tokenizer, opt.max_seq_len)
            self.model = opt.model_class(transformer, opt).to(opt.device)
        # elif 'xlnet' in opt.model_name:
        #     tokenizer = Tokenizer4Pretrain(tokenizer, opt.max_seq_len)
        #     self.model = opt.model_class(bert,opt).to(opt.device)
        else:
            tokenizer = build_tokenizer(
                fnames=[opt.dataset_file['train'], opt.dataset_file['test']],
                max_seq_len=opt.max_seq_len,
                dat_fname='{0}_tokenizer.dat'.format(opt.dataset))
            embedding_matrix = build_embedding_matrix(
                word2idx=tokenizer.word2idx,
                embed_dim=opt.embed_dim,
                dat_fname='{0}_{1}_embedding_matrix.dat'.format(
                    str(opt.embed_dim), opt.dataset))
            self.model = opt.model_class(embedding_matrix, opt).to(opt.device)

        self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer)
        self.testset = ABSADataset(opt.dataset_file['test'], tokenizer)
        assert 0 <= opt.valset_ratio < 1
        if opt.valset_ratio > 0:
            valset_len = int(len(self.trainset) * opt.valset_ratio)
            self.trainset, self.valset = random_split(
                self.trainset, (len(self.trainset) - valset_len, valset_len))
        else:
            self.valset = self.testset

        if opt.device.type == 'cuda':
            logger.info('cuda memory allocated: {}'.format(
                torch.cuda.memory_allocated(device=opt.device.index)))
        self._print_args()
Example #20
0
    def __init__(self,
                 token_makers,
                 lang_code="en",
                 pretrained_model_name=None,
                 answer_maxlen=30):
        super(RoBertaForQA, self).__init__(token_makers)

        self.lang_code = lang_code
        self.use_pytorch_transformers = True  # for optimizer's model parameters
        self.answer_maxlen = answer_maxlen

        self.model = RobertaModel.from_pretrained(pretrained_model_name,
                                                  cache_dir=str(
                                                      CachePath.ROOT))
        self.qa_outputs = nn.Linear(self.model.config.hidden_size,
                                    self.model.config.num_labels)
        self.criterion = nn.CrossEntropyLoss()
Example #21
0
    def __init__(self, token_makers, pretrained_model_name=None, dropout=0.2):

        super(RobertaForRegression, self).__init__(token_makers)

        self.use_pytorch_transformers = True  # for optimizer's model parameters

        NUM_CLASSES = 1

        self._model = RobertaModel.from_pretrained(pretrained_model_name,
                                                   cache_dir=str(
                                                       CachePath.ROOT))
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(self._model.config.hidden_size, NUM_CLASSES))
        self.classifier.apply(self._model.init_weights)

        self.criterion = nn.MSELoss()
Example #22
0
    def __init__(self, token_makers, num_classes, pretrained_model_name=None, dropout=0.2):

        super(RobertaForSeqCls, self).__init__(token_makers)

        self.use_pytorch_transformers = True  # for optimizer's model parameters

        self.num_classes = num_classes

        self._model = RobertaModel.from_pretrained(
            pretrained_model_name, cache_dir=str(CachePath.ROOT)
        )
        self.classifier = nn.Sequential(
            nn.Linear(self._model.config.hidden_size, self._model.config.hidden_size),
            nn.Dropout(dropout),
            nn.Linear(self._model.config.hidden_size, num_classes)
        )
        self.classifier.apply(self._model.init_weights)

        self.criterion = nn.CrossEntropyLoss()
Example #23
0
    def __init__(self,
                 model_name_or_path: str,
                 max_seq_length: int = 128,
                 do_lower_case: bool = True):
        super(RoBERTa, self).__init__()
        self.config_keys = ['max_seq_length', 'do_lower_case']
        self.do_lower_case = do_lower_case

        if max_seq_length > 510:
            logging.warning(
                "RoBERTa only allows a max_seq_length of 510 (512 with special tokens). Value will be set to 510"
            )
            max_seq_length = 510
        self.max_seq_length = max_seq_length

        self.roberta = RobertaModel.from_pretrained(model_name_or_path)
        self.tokenizer = RobertaTokenizer.from_pretrained(
            model_name_or_path, do_lower_case=do_lower_case)
        self.cls_token_id = self.tokenizer.convert_tokens_to_ids(
            [self.tokenizer.cls_token])[0]
        self.sep_token_id = self.tokenizer.convert_tokens_to_ids(
            [self.tokenizer.sep_token])[0]
def test_roberta_embeddings():
    roberta_model: str = "roberta-base"

    tokenizer = RobertaTokenizer.from_pretrained(roberta_model)
    model = RobertaModel.from_pretrained(
        pretrained_model_name_or_path=roberta_model, output_hidden_states=True
    )
    model.to(flair.device)
    model.eval()

    s: str = "Berlin and Munich have a lot of puppeteer to see ."

    with torch.no_grad():
        tokens = tokenizer.tokenize("<s> " + s + " </s>")

        indexed_tokens = tokenizer.convert_tokens_to_ids(tokens)
        tokens_tensor = torch.tensor([indexed_tokens])
        tokens_tensor = tokens_tensor.to(flair.device)

        hidden_states = model(tokens_tensor)[-1]

        first_layer = hidden_states[1][0]

    assert len(first_layer) == len(tokens)

    #         0           1      2       3        4         5       6      7      8       9      10     11     12     13    14      15
    #
    #       '<s>',      'Ber', 'lin', 'Ġand', 'ĠMunich', 'Ġhave', 'Ġa', 'Ġlot', 'Ġof', 'Ġpupp', 'ete', 'er', 'Ġto', 'Ġsee', 'Ġ.',  '</s>'
    #                      \     /       |        |         |       |      |      |         \      |      /     |      |      |
    #                       Berlin      and    Munich     have      a     lot     of           puppeteer        to    see     .
    #
    #                         0          1        2         3       4      5       6               7             8     9      10

    def embed_sentence(
        sentence: str,
        pooling_operation,
        layers: str = "1",
        use_scalar_mix: bool = False,
    ) -> Sentence:
        embeddings = RoBERTaEmbeddings(
            pretrained_model_name_or_path=roberta_model,
            layers=layers,
            pooling_operation=pooling_operation,
            use_scalar_mix=use_scalar_mix,
        )
        flair_sentence = Sentence(sentence)
        embeddings.embed(flair_sentence)

        return flair_sentence

    # First subword embedding
    sentence_first_subword = embed_sentence(sentence=s, pooling_operation="first")

    first_token_embedding_ref = first_layer[1].tolist()
    first_token_embedding_actual = sentence_first_subword.tokens[0].embedding.tolist()

    puppeteer_first_subword_embedding_ref = first_layer[9].tolist()
    puppeteer_first_subword_embedding_actual = sentence_first_subword.tokens[
        7
    ].embedding.tolist()

    assert first_token_embedding_ref == first_token_embedding_actual
    assert (
        puppeteer_first_subword_embedding_ref
        == puppeteer_first_subword_embedding_actual
    )

    # Last subword embedding
    sentence_last_subword = embed_sentence(sentence=s, pooling_operation="last")

    # First token is splitted into two subwords.
    # As we use "last" as pooling operation, we consider the last subword as "first token" here
    first_token_embedding_ref = first_layer[2].tolist()
    first_token_embedding_actual = sentence_last_subword.tokens[0].embedding.tolist()

    puppeteer_last_subword_embedding_ref = first_layer[11].tolist()
    puppeteer_last_subword_embedding_actual = sentence_last_subword.tokens[
        7
    ].embedding.tolist()

    assert first_token_embedding_ref == first_token_embedding_actual
    assert (
        puppeteer_last_subword_embedding_ref == puppeteer_last_subword_embedding_actual
    )

    # First and last subword embedding
    sentence_first_last_subword = embed_sentence(
        sentence=s, pooling_operation="first_last"
    )

    first_token_embedding_ref = torch.cat([first_layer[1], first_layer[2]]).tolist()
    first_token_embedding_actual = sentence_first_last_subword.tokens[
        0
    ].embedding.tolist()

    puppeteer_first_last_subword_embedding_ref = torch.cat(
        [first_layer[9], first_layer[11]]
    ).tolist()
    puppeteer_first_last_subword_embedding_actual = sentence_first_last_subword.tokens[
        7
    ].embedding.tolist()

    assert first_token_embedding_ref == first_token_embedding_actual
    assert (
        puppeteer_first_last_subword_embedding_ref
        == puppeteer_first_last_subword_embedding_actual
    )

    # Mean of all subword embeddings
    sentence_mean_subword = embed_sentence(sentence=s, pooling_operation="mean")

    first_token_embedding_ref = calculate_mean_embedding(
        [first_layer[1], first_layer[2]]
    ).tolist()
    first_token_embedding_actual = sentence_mean_subword.tokens[0].embedding.tolist()

    puppeteer_mean_subword_embedding_ref = calculate_mean_embedding(
        [first_layer[9], first_layer[10], first_layer[11]]
    ).tolist()
    puppeteer_mean_subword_embedding_actual = sentence_mean_subword.tokens[
        7
    ].embedding.tolist()

    assert first_token_embedding_ref == first_token_embedding_actual
    assert (
        puppeteer_mean_subword_embedding_ref == puppeteer_mean_subword_embedding_actual
    )

    # Check embedding dimension when using multiple layers
    sentence_mult_layers = embed_sentence(
        sentence="Munich", pooling_operation="first", layers="1,2,3,4"
    )

    ref_embedding_size = 4 * 768
    actual_embedding_size = len(sentence_mult_layers.tokens[0].embedding)

    assert ref_embedding_size == actual_embedding_size

    # Check embedding dimension when using multiple layers and scalar mix
    sentence_mult_layers_scalar_mix = embed_sentence(
        sentence="Berlin",
        pooling_operation="first",
        layers="1,2,3,4",
        use_scalar_mix=True,
    )

    ref_embedding_size = 1 * 768
    actual_embedding_size = len(sentence_mult_layers_scalar_mix.tokens[0].embedding)

    assert ref_embedding_size == actual_embedding_size
Example #25
0
from mspan_roberta_gcn.drop_roberta_dataset import DropReader
from pytorch_transformers import RobertaTokenizer, RobertaModel, RobertaConfig


parser = argparse.ArgumentParser("Bert inference task.")
options.add_bert_args(parser)
options.add_model_args(parser)
options.add_inference_args(parser)

args = parser.parse_args()

args.cuda = torch.cuda.device_count() > 0


print("Build bert model.")
bert_model = RobertaModel(RobertaConfig().from_pretrained(args.roberta_model))
print("Build Drop model.")
network = NumericallyAugmentedBertNet(bert_model,
                hidden_size=bert_model.config.hidden_size,
                dropout_prob=0.0,
                use_gcn=args.use_gcn,
                gcn_steps=args.gcn_steps)

if args.cuda: network.cuda()
print("Load from pre path {}.".format(args.pre_path))
network.load_state_dict(torch.load(args.pre_path))

print("Load data from {}.".format(args.inf_path))
tokenizer = RobertaTokenizer.from_pretrained(args.roberta_model)
inf_iter = DropBatchGen(args, tokenizer, DropReader(tokenizer, passage_length_limit=463, question_length_limit=46)._read(args.inf_path))
Example #26
0
parser = argparse.ArgumentParser("Bert inference task.")
options.add_bert_args(parser)
options.add_model_args(parser)
options.add_inference_args(parser)
parser.add_argument("--eng", type=int, required=False)

args = parser.parse_args()

args.cuda = torch.cuda.device_count() > 0


print("Build bert model.")
if args.eng == 0:
    bert_model = BertModel.from_pretrained(args.roberta_model)
else:
    bert_model = RobertaModel.from_pretrained(args.roberta_model)
print("Build Drop model.")
if args.tag_mspan:
    network = TNumericallyAugmentedBertNet(bert_model,
                                          hidden_size=bert_model.config.hidden_size,
                                          dropout_prob=0.0,
                                          use_gcn=args.use_gcn,
                                          gcn_steps=args.gcn_steps,
                                          is_eng=args.eng)
else:
    network = NumericallyAugmentedBertNet(bert_model,
                hidden_size=bert_model.config.hidden_size,
                dropout_prob=0.0,
                use_gcn=args.use_gcn,
                gcn_steps=args.gcn_steps)
Example #27
0
    crf = ConditionalRandomField(len(roles_to_idx),
                                 None,
                                 include_start_end_transitions=True)
    print(crf)

    model_parameters = filter(lambda p: p.requires_grad,
                              chain(srl.parameters(), crf.parameters()))

    num_params = sum([np.prod(p.size()) for p in model_parameters])
    print("Total parameters =", num_params)
    print(params)

    if params.use_bert:
        bert_tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
        bert_model = RobertaModel.from_pretrained("roberta-base",
                                                  output_hidden_states=True)
        if params.gpu_id > -1:
            bert_model.cuda()
    else:
        bert_tokenizer = None
        bert_model = None
    if params.gpu_id > -1:
        srl.cuda()
        crf.cuda()

    srl.load_state_dict(torch.load(os.path.join(params.dir, params.modelname)))

    crf.load_state_dict(
        torch.load(os.path.join(params.dir, params.modelname + "crf")))

    evaluate(
Example #28
0
 def __init__(self):
     super(RobertaEncoder, self).__init__()
     self.encoder = RobertaModel.from_pretrained('roberta-base')
Example #29
0
 def __init__(self, config):
     super(RoBertaMCQConcat, self).__init__(config)
     self.roberta = RobertaModel(config)
     self._dropout = nn.Dropout(config.hidden_dropout_prob)
     self._classification_layer = nn.Linear(config.hidden_size, 1)
     self.apply(self.init_weights)
Example #30
0
    def create_model(self, only_model=False):
        logger.info("creating model {}".format(self.opt.model_name))

        if self.opt.model_name in [
                "aen_bert",
                "aen_distilbert",
                "aen_roberta",
                "aen_distilroberta",
                "spc_distilbert",
                "spc_bert",
                "spc_roberta",
                "lcf_bert",
                "fx_bert",
        ]:
            if not only_model:
                if self.opt.model_name in [
                        "aen_bert",
                        "spc_bert",
                        "lcf_bert",
                        "fx_bert",
                ]:
                    self.tokenizer = Tokenizer4Bert(
                        self.opt.pretrained_model_name,
                        self.opt.max_seq_len,
                        self.opt.global_context_seqs_per_doc,
                    )
                elif self.opt.model_name in [
                        "aen_distilbert", "spc_distilbert"
                ]:
                    self.tokenizer = Tokenizer4Distilbert(
                        self.opt.pretrained_model_name,
                        self.opt.max_seq_len,
                    )
                elif self.opt.model_name in ["aen_roberta", "spc_roberta"]:
                    self.tokenizer = Tokenizer4Roberta(
                        self.opt.pretrained_model_name,
                        self.opt.max_seq_len,
                    )
                elif self.opt.model_name in [
                        "aen_distilroberta", "spc_distiloberta"
                ]:
                    self.tokenizer = Tokenizer4Roberta(
                        self.opt.pretrained_model_name,
                        self.opt.max_seq_len,
                    )

            if not os.path.isdir(self.opt.pretrained_model_name):
                pretrained_model = torch.hub.load(
                    'huggingface/transformers', 'model',
                    self.opt.pretrained_model_name)
            elif self.opt.model_name in [
                    "aen_bert", "spc_bert", "lcf_bert", "fx_bert"
            ]:
                pretrained_model = BertModel.from_pretrained(
                    self.opt.pretrained_model_name, output_hidden_states=True)
            elif self.opt.model_name in ["aen_distilbert", "spc_distilbert"]:
                pretrained_model = DistilBertModel.from_pretrained(
                    self.opt.pretrained_model_name, output_hidden_states=True)
            elif self.opt.model_name in ["aen_roberta", "spc_roberta"]:
                pretrained_model = RobertaModel.from_pretrained(
                    self.opt.pretrained_model_name, output_hidden_states=True)

            if self.opt.state_dict == "pretrained":
                try:
                    self.model = self.opt.model_class(
                        pretrained_model,
                        self.opt,
                        pretrained=self.opt.state_dict == "pretrained",
                        map_location=self.opt.device).to(self.opt.device)
                except TypeError as e:
                    logger.error(
                        "The selected model does not support the 'pretrained'-keyword for state_dict"
                    )
                    exit(1)
            else:
                self.model = self.opt.model_class(pretrained_model,
                                                  self.opt).to(self.opt.device)

            if self.opt.state_dict and self.opt.state_dict != "pretrained":
                # load weights from the state_dict
                logger.info(f"loading weights from {self.opt.state_dict}")
                self.model.load_state_dict(
                    torch.load(self.opt.state_dict,
                               map_location=self.opt.device))

        elif self.opt.model_name in ["aen_glove", "ram"]:
            if not only_model:
                self.tokenizer = Tokenizer4GloVe(self.opt.max_seq_len)

            if self.opt.model_name == "aen_glove":
                self.model = self.opt.model_class(
                    self.tokenizer.embedding_matrix,
                    self.opt).to(self.opt.device)
            elif self.opt.model_name == "ram":
                self.model = self.opt.model_class(self.opt).to(self.opt.device)

        else:
            raise Exception("model_name unknown: {}".format(
                self.opt.model_name))