def get_transformers_model( settings: Dict[str, Any], model_name: str, pretrained: bool = True, ckptdir: Optional[Path] = None, ) -> PreTrainedModel: model_path = model_name if pretrained else str(ckptdir) config = AutoConfig.from_pretrained(model_path) config.attention_probs_dropout_prob = settings.get( 'encoder_attn_dropout_rate', 0.1) config.hidden_dropout_prob = settings.get('encoder_ffn_dropout_rate', 0.1) config.layer_norm_eps = settings.get('layer_norm_eps', 1e-5) if pretrained: model = AutoModel.from_pretrained(model_name, config=config) return model # if you want not parameters but only model structure, each model class is needed. if 'xlm' in model_name: model = XLMModel(config=config) elif 'albert' in model_name: model = AlbertModel(config=config) elif 'roberta' in model_name: model = RobertaModel(config=config) elif 'deberta-v2' in model_name: model = DebertaV2Model(config=config) elif 'deberta' in model_name: model = DebertaModel(config=config) elif 'bert' in model_name: model = BertModel(config=config) elif 'electra' in model_name: model = ElectraModel(config=config) else: model = BertModel(config=config) return model
def test_inference_no_head(self): model = DebertaV2Model.from_pretrained("microsoft/deberta-v2-xlarge") input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) output = model(input_ids, attention_mask=attention_mask)[0] # compare the actual values for a slice. expected_slice = torch.tensor( [[[0.2356, 0.1948, 0.0369], [-0.1063, 0.3586, -0.5152], [-0.6399, -0.0259, -0.2525]]] ) self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4), f"{output[:, 1:4, 1:4]}")
def create_and_check_deberta_model( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): model = DebertaV2Model(config=config) model.to(torch_device) model.eval() sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0] sequence_output = model(input_ids, token_type_ids=token_type_ids)[0] sequence_output = model(input_ids)[0] self.parent.assertListEqual(list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size])
def test_inference_no_head(self): random.seed(0) np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed_all(0) model = DebertaV2Model.from_pretrained("microsoft/deberta-v2-xlarge") input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) output = model(input_ids)[0] # compare the actual values for a slice. expected_slice = torch.tensor( [[[-0.2913, 0.2647, 0.5627], [-0.4318, 0.1389, 0.3881], [-0.2929, -0.2489, 0.3452]]] ) self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4), f"{output[:, :3, :3]}")
def test_model_from_pretrained(self): for model_name in DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = DebertaV2Model.from_pretrained(model_name) self.assertIsNotNone(model)