def test_inference_no_head(self): model = TFRobertaModel.from_pretrained("roberta-base") input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) output = model(input_ids)[0] # compare the actual values for a slice. expected_slice = tf.constant(
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_layers = config.num_labels self.backbone = TFRobertaModel(config, *inputs, **kwargs, name="roberta_backbone") self.dropout = tf.keras.layers.Dropout(0.2) self.dropout_multisampled = tf.keras.layers.Dropout(0.5) self.classifiers = [ tf.keras.layers.Dense( 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier") for _ in range(config.num_labels) ] self.concat = tf.keras.layers.Concatenate(axis=-1) self.hidden_states_weights = tf.Variable( initial_value=[-3.0] * config.num_hidden_layers + [0.0], dtype='float32', trainable=True, name="hidden_state_weights") self.softmax_act = tf.keras.layers.Softmax(axis=0) self.backbone.roberta.pooler._trainable = False
def test_model_from_pretrained(self): cache_dir = "/tmp/transformers_test/" for model_name in list( TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TFRobertaModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) self.assertIsNotNone(model)
def test_inference_no_head(self): model = TFRobertaModel.from_pretrained("roberta-base") input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) output = model(input_ids)[0] # compare the actual values for a slice. expected_slice = tf.constant( [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0540, -0.0175], [0.0548, 0.0799, 0.1687]]] ) self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
def create_and_check_roberta_model( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): model = TFRobertaModel(config=config) inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids} sequence_output = model(inputs)[0] inputs = [input_ids, input_mask] sequence_output = model(inputs)[0] sequence_output = model(input_ids)[0] result = { "sequence_output": sequence_output.numpy(), } self.parent.assertListEqual( list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] )
def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = TFRobertaModel(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids } result = model(inputs) inputs = [input_ids, input_mask] result = model(inputs) result = model(input_ids) self.parent.assertEqual( result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_layers = config.num_labels self.backbone = TFRobertaModel(config, *inputs, **kwargs, name="roberta_backbone") self.dropout = tf.keras.layers.Dropout(0.2) self.dropout_multisampled = tf.keras.layers.Dropout(0.5) self.weighted_sum = WeightedSumLayer(config.num_hidden_layers) self.classifier = tf.keras.layers.Dense( config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier") self.backbone.roberta.pooler._trainable = False
def test_model_from_pretrained(self): for model_name in list(TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TFRobertaModel.from_pretrained(model_name, cache_dir=CACHE_DIR) self.assertIsNotNone(model)
def test_model_from_pretrained(self): for model_name in TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFRobertaModel.from_pretrained(model_name) self.assertIsNotNone(model)