def test_inference_no_head(self): model = MobileBertModel.from_pretrained( "google/mobilebert-uncased").to(torch_device) input_ids = _long_tensor( [[101, 7110, 1005, 1056, 2023, 11333, 17413, 1029, 102]]) with torch.no_grad(): output = model(input_ids)[0] expected_shape = torch.Size((1, 9, 512)) self.assertEqual(output.shape, expected_shape) expected_slice = torch.tensor( [[ [-2.4736526e07, 8.2691656e04, 1.6521838e05], [-5.7541704e-01, 3.9056022e00, 4.4011507e00], [2.6047359e00, 1.5677652e00, -1.7324188e-01], ]], device=torch_device, ) # MobileBERT results range from 10e0 to 10e8. Even a 0.0000001% difference with a value of 10e8 results in a # ~1 difference, it's therefore not a good idea to measure using addition. # Here, we instead divide the expected result with the result in order to obtain ~1. We then check that the # result is held between bounds: 1 - TOLERANCE < expected_result / result < 1 + TOLERANCE lower_bound = torch.all( (expected_slice / output[..., :3, :3]) >= 1 - TOLERANCE) upper_bound = torch.all( (expected_slice / output[..., :3, :3]) <= 1 + TOLERANCE) self.assertTrue(lower_bound and upper_bound)
def __init__(self, config, project_dim: int = 0, ctx_bottleneck: bool = False): MobileBertModel.__init__(self, config) assert config.hidden_size > 0, 'Encoder hidden_size can\'t be zero' self.encode_proj = nn.Linear( config.hidden_size, project_dim) if project_dim != 0 else None self.decode_proj = nn.Sequential( nn.Tanh(), nn.Linear(project_dim, (config.hidden_size + project_dim) // 2), nn.Tanh(), nn.Linear((config.hidden_size + project_dim) // 2, config.hidden_size), ) if ctx_bottleneck else None self.init_weights()
def __init__(self, num_labels=17): self.num_labels = num_labels super(MobileBertForMultiLabelSequenceClassification, self).__init__() self.bert = MobileBertModel.from_pretrained( 'google/mobilebert-uncased', hidden_act="gelu", num_labels=num_labels) self.dropout = torch.nn.Dropout(0.1) self.classifier = torch.nn.Linear(512, num_labels)
def create_and_check_mobilebert_model_as_decoder( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, ): model = MobileBertModel(config) model.to(torch_device) model.eval() result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, ) result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, ) result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
def __init__(self, config, num_labels=17, mobilebert = True): self.mobilebert = mobilebert if not mobilebert: super(BertForMultiLabelSequenceClassification, self).__init__(config) else: super(BertForMultiLabelSequenceClassification, self).__init__(config) self.num_labels = num_labels self.bert = BertModel(config) if not mobilebert else MobileBertModel.from_pretrained( 'google/mobilebert-uncased', num_labels=num_labels,) self.dropout = torch.nn.Dropout( config.hidden_dropout_prob) self.classifier = torch.nn.Linear( config.hidden_size, num_labels) if not mobilebert: self.apply(self.init_bert_weights)
def Net(): bert = MobileBertModel.from_pretrained('google/mobilebert-uncased') HIDDEN_DIM = 256 OUTPUT_DIM = 1 N_LAYERS = 2 BIDIRECTIONAL = True DROPOUT = 0.25 model = BERTGRUSentiment(bert, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT) for name, param in model.named_parameters(): if name.startswith('bert'): param.requires_grad = False return model
def create_and_check_mobilebert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = MobileBertModel(config=config) model.to(torch_device) model.eval() result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) result = model(input_ids, token_type_ids=token_type_ids) result = model(input_ids) self.parent.assertListEqual( list(result["last_hidden_state"].size()), [self.batch_size, self.seq_length, self.hidden_size]) self.parent.assertListEqual(list(result["pooler_output"].size()), [self.batch_size, self.hidden_size])
def create_and_check_mobilebert_model_as_decoder( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, ): model = MobileBertModel(config) model.to(torch_device) model.eval() sequence_output, pooled_output = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, ) sequence_output, pooled_output = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, ) sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) result = { "sequence_output": sequence_output, "pooled_output": pooled_output, } self.parent.assertListEqual( list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def test_model_from_pretrained(self): for model_name in MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = MobileBertModel.from_pretrained(model_name) self.assertIsNotNone(model)