def __init__(self): super().__init__() self.embeddings = TransformerEmbeddings.from_pretrained_module( pretrained) self.transformer = TransformerStack.from_pretrained_module( pretrained, num_hidden_layers=4)
def __init__( self, vocab: Vocabulary, transformer_model: str = "roberta-large", override_weights_file: Optional[str] = None, **kwargs ) -> None: super().__init__(vocab, **kwargs) transformer_kwargs = { "model_name": transformer_model, "weights_path": override_weights_file, } self.embeddings = TransformerEmbeddings.from_pretrained_module(**transformer_kwargs) self.transformer_stack = TransformerStack.from_pretrained_module(**transformer_kwargs) self.pooler = TransformerPooler.from_pretrained_module(**transformer_kwargs) self.pooler_dropout = Dropout(p=0.1) self.linear_layer = torch.nn.Linear(self.pooler.get_output_dim(), 1) self.linear_layer.weight.data.normal_(mean=0.0, std=0.02) self.linear_layer.bias.data.zero_() self.loss = torch.nn.CrossEntropyLoss() from allennlp.training.metrics import CategoricalAccuracy self.accuracy = CategoricalAccuracy()
def from_huggingface_model_name( cls, vocab: Vocabulary, model_name: str, image_feature_dim: int, image_num_hidden_layers: int, image_hidden_size: int, image_num_attention_heads: int, combined_hidden_size: int, combined_num_attention_heads: int, pooled_output_dim: int, image_intermediate_size: int, image_attention_dropout: float, image_hidden_dropout: float, image_biattention_id: List[int], text_biattention_id: List[int], text_fixed_layer: int, image_fixed_layer: int, pooled_dropout: float = 0.1, fusion_method: str = "sum", *, ignore_text: bool = False, ignore_image: bool = False, ): text_embeddings = TransformerEmbeddings.from_pretrained_module( model_name) image_embeddings = ImageFeatureEmbeddings( feature_size=image_feature_dim, embedding_size=image_hidden_size, dropout=image_hidden_dropout, ) encoder = BiModalEncoder.from_pretrained_module( model_name, num_hidden_layers2=image_num_hidden_layers, hidden_size2=image_hidden_size, num_attention_heads2=image_num_attention_heads, combined_hidden_size=combined_hidden_size, combined_num_attention_heads=combined_num_attention_heads, intermediate_size2=image_intermediate_size, attention_dropout2=image_attention_dropout, hidden_dropout2=image_hidden_dropout, biattention_id1=text_biattention_id, biattention_id2=image_biattention_id, fixed_layer1=text_fixed_layer, fixed_layer2=image_fixed_layer, ) return cls( vocab=vocab, text_embeddings=text_embeddings, image_embeddings=image_embeddings, encoder=encoder, pooled_output_dim=pooled_output_dim, fusion_method=fusion_method, dropout=pooled_dropout, ignore_text=ignore_text, ignore_image=ignore_image, )
def __init__(self): super().__init__() self.embeddings = TransformerEmbeddings.from_pretrained_module( pretrained, relevant_module="bert.embeddings") self.transformer = TransformerStack.from_pretrained_module( pretrained, num_hidden_layers=4, relevant_module="bert.encoder", strict=False, )
def __init__(self): super().__init__() self.embeddings = TransformerEmbeddings.from_pretrained_module( "bert-base-uncased") self.separate_transformer = TransformerStack.from_pretrained_module( "bert-base-uncased", num_hidden_layers=range(0, 8)) self.combined_transformer = TransformerStack.from_pretrained_module( "bert-base-uncased", num_hidden_layers=range(8, 12), )
def test_loading_from_pretrained_weights_using_model_name(self, pretrained_name): pretrained_module = cached_transformers.get(pretrained_name, False).embeddings module = TransformerEmbeddings.from_pretrained_module(pretrained_name) mapping = { val: key for key, val in module._construct_default_mapping( pretrained_module, "huggingface", {} ).items() } missing = assert_equal_parameters(pretrained_module, module, mapping=mapping) assert len(missing) == 0
def test_loading_albert(): """ Albert is a special case because it includes a Linear layer in the encoder that maps the embeddings to the encoder hidden size, but we include this linear layer within our embedding layer. """ transformer_embedding = TransformerEmbeddings.from_pretrained_module( "albert-base-v2", ) albert = AutoModel.from_pretrained("albert-base-v2") assert_allclose( transformer_embedding.embeddings.word_embeddings.weight.data, albert.embeddings.word_embeddings.weight.data, ) assert_allclose( transformer_embedding.linear_transform.weight.data, albert.encoder.embedding_hidden_mapping_in.weight.data, )
def __init__(self): super().__init__() self.embeddings = TransformerEmbeddings.from_pretrained_module( "bert-base-cased", relevant_module="bert.embeddings") self.separate_transformer = TransformerStack.from_pretrained_module( "bert-base-cased", relevant_module="bert.encoder", num_hidden_layers=8, strict=False, ) self.combined_transformer = TransformerStack.from_pretrained_module( "bert-base-cased", relevant_module="bert.encoder", num_hidden_layers=4, mapping={ f"layer.{l}": f"layers.{i}" for (i, l) in enumerate(range(8, 12)) }, strict=False, )
def test_forward_against_huggingface_output(self, module_name, hf_module): input_ids = torch.tensor([[1, 2]]) token_type_ids = torch.tensor([[1, 0]], dtype=torch.long) position_ids = torch.tensor([[0, 1]]) torch.manual_seed(1234) embeddings = TransformerEmbeddings.from_pretrained_module(hf_module) torch.manual_seed(1234) embeddings = embeddings.eval() # setting to eval mode to avoid non-deterministic dropout. output = embeddings.forward( input_ids=input_ids, token_type_ids=token_type_ids, position_ids=position_ids ) torch.manual_seed(1234) hf_module = hf_module.eval() # setting to eval mode to avoid non-deterministic dropout. hf_output = hf_module.forward( input_ids=input_ids, token_type_ids=token_type_ids, position_ids=position_ids ) assert torch.allclose(output, hf_output)
def test_end_to_end(self, model_name: str): data = [ ("I'm against picketing", "but I don't know how to show it."), ("I saw a human pyramid once.", "It was very unnecessary."), ] tokenizer = cached_transformers.get_tokenizer(model_name) batch = tokenizer.batch_encode_plus(data, padding=True, return_tensors="pt") with torch.no_grad(): huggingface_model = cached_transformers.get( model_name, make_copy=False).eval() huggingface_output = huggingface_model(**batch) embeddings = TransformerEmbeddings.from_pretrained_module( model_name).eval() transformer_stack = TransformerStack.from_pretrained_module( model_name).eval() pooler = TransformerPooler.from_pretrained_module( model_name).eval() batch["attention_mask"] = batch["attention_mask"].to(torch.bool) output = embeddings(**batch) output = transformer_stack(output, batch["attention_mask"]) assert_allclose( output.final_hidden_states, huggingface_output.last_hidden_state, rtol=0.0001, atol=1e-4, ) output = pooler(output.final_hidden_states) assert_allclose(output, huggingface_output.pooler_output, rtol=0.0001, atol=1e-4)
def __init__( self, vocab: Vocabulary, transformer_model: str = "roberta-large", num_labels: Optional[int] = None, label_namespace: str = "labels", override_weights_file: Optional[str] = None, **kwargs, ) -> None: super().__init__(vocab, **kwargs) transformer_kwargs = { "model_name": transformer_model, "weights_path": override_weights_file, } self.embeddings = TransformerEmbeddings.from_pretrained_module( **transformer_kwargs) self.transformer_stack = TransformerStack.from_pretrained_module( **transformer_kwargs) self.pooler = TransformerPooler.from_pretrained_module( **transformer_kwargs) self.pooler_dropout = Dropout(p=0.1) self.label_tokens = vocab.get_index_to_token_vocabulary( label_namespace) if num_labels is None: num_labels = len(self.label_tokens) self.linear_layer = torch.nn.Linear(self.pooler.get_output_dim(), num_labels) self.linear_layer.weight.data.normal_(mean=0.0, std=0.02) self.linear_layer.bias.data.zero_() from allennlp.training.metrics import CategoricalAccuracy, FBetaMeasure self.loss = torch.nn.CrossEntropyLoss() self.acc = CategoricalAccuracy() self.f1 = FBetaMeasure()
def test_loading_from_pretrained_module(pretrained_name): TransformerEmbeddings.from_pretrained_module(pretrained_name)
def from_huggingface_model_name( cls, vocab: Vocabulary, model_name: str, image_feature_dim: int, image_num_hidden_layers: int, image_hidden_size: int, image_num_attention_heads: int, combined_hidden_size: int, combined_num_attention_heads: int, pooled_output_dim: int, image_intermediate_size: int, image_attention_dropout: float, image_hidden_dropout: float, image_biattention_id: List[int], text_biattention_id: List[int], text_fixed_layer: int, image_fixed_layer: int, pooled_dropout: float = 0.1, fusion_method: str = "sum", *, ignore_text: bool = False, ignore_image: bool = False, ): transformer = AutoModel.from_pretrained(model_name) # Albert (and maybe others?) has this "embedding_size", that's different from "hidden_size". # To get them to the same dimensionality, it uses a linear transform after the embedding # layer, which we need to pull out and copy here. if hasattr(transformer.config, "embedding_size"): config = transformer.config text_embeddings = TransformerEmbeddings.from_pretrained_module( transformer.embeddings, output_size=config.hidden_dim ) from transformers.models.albert.modeling_albert import AlbertModel if isinstance(transformer, AlbertModel): text_embeddings.linear_transform = deepcopy( transformer.encoder.embedding_hidden_mapping_in ) else: logger.warning( "Unknown model that uses separate embedding size; weights of the linear " f"transform will not be initialized. Model type is: {transformer.__class__}" ) else: text_embeddings = TransformerEmbeddings.from_pretrained_module(transformer.embeddings) image_embeddings = ImageFeatureEmbeddings( feature_size=image_feature_dim, embedding_size=image_hidden_size, dropout=image_hidden_dropout, ) encoder = BiModalEncoder.from_pretrained_module( pretrained_module=transformer, num_hidden_layers2=image_num_hidden_layers, hidden_size2=image_hidden_size, num_attention_heads2=image_num_attention_heads, combined_hidden_size=combined_hidden_size, combined_num_attention_heads=combined_num_attention_heads, intermediate_size2=image_intermediate_size, attention_dropout2=image_attention_dropout, hidden_dropout2=image_hidden_dropout, biattention_id1=text_biattention_id, biattention_id2=image_biattention_id, fixed_layer1=text_fixed_layer, fixed_layer2=image_fixed_layer, ) return cls( vocab=vocab, text_embeddings=text_embeddings, image_embeddings=image_embeddings, encoder=encoder, pooled_output_dim=pooled_output_dim, fusion_method=fusion_method, dropout=pooled_dropout, ignore_text=ignore_text, ignore_image=ignore_image, )