Beispiel #1
0
    def test_sanity(self):
        class TextEmbeddings(TransformerModule, FromParams):
            def __init__(
                self,
                vocab_size: int,
                hidden_size: int,
                pad_token_id: int,
                max_position_embeddings: int,
                type_vocab_size: int,
                dropout: float,
            ):
                super().__init__()
                self.word_embeddings = torch.nn.Embedding(
                    vocab_size, hidden_size, padding_idx=pad_token_id
                )
                self.position_embeddings = torch.nn.Embedding(max_position_embeddings, hidden_size)
                self.token_type_embeddings = torch.nn.Embedding(type_vocab_size, hidden_size)

                self.layer_norm = torch.nn.LayerNorm(hidden_size, eps=1e-12)
                self.dropout = torch.nn.Dropout(dropout)

            def forward(
                self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None
            ):
                if input_ids is not None:
                    input_shape = input_ids.size()
                else:
                    input_shape = inputs_embeds.size()[:-1]

                seq_length = input_shape[1]
                device = input_ids.device if input_ids is not None else inputs_embeds.device
                if position_ids is None:
                    position_ids = torch.arange(seq_length, dtype=torch.long, device=device)
                    position_ids = position_ids.unsqueeze(0).expand(input_shape)
                if token_type_ids is None:
                    token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)

                if inputs_embeds is None:
                    inputs_embeds = self.word_embeddings(input_ids)
                position_embeddings = self.position_embeddings(position_ids)
                token_type_embeddings = self.token_type_embeddings(token_type_ids)

                embeddings = inputs_embeds + position_embeddings + token_type_embeddings
                embeddings = self.layer_norm(embeddings)
                embeddings = self.dropout(embeddings)
                return embeddings

        torch.manual_seed(23)
        text = TextEmbeddings(10, 5, 2, 3, 7, 0.0)
        torch.manual_seed(23)
        transformer = TransformerEmbeddings(10, 5, 2, 3, 7, 0.0)

        input_ids = torch.tensor([[1, 2]])
        token_type_ids = torch.tensor([[1, 0]], dtype=torch.long)
        position_ids = torch.tensor([[0, 1]])

        text_output = text.forward(input_ids, token_type_ids, position_ids)
        transformer_output = transformer.forward(input_ids, token_type_ids, position_ids)

        assert_allclose(text_output, transformer_output)
Beispiel #2
0
            def __init__(self):
                super().__init__()
                self.embeddings = TransformerEmbeddings.from_pretrained_module(
                    pretrained)

                self.transformer = TransformerStack.from_pretrained_module(
                    pretrained, num_hidden_layers=4)
Beispiel #3
0
    def test_no_token_type_layer(self):
        params = copy.deepcopy(self.params_dict)
        params["type_vocab_size"] = 0
        params = Params(params)
        module = TransformerEmbeddings.from_params(params)

        assert len(module.embeddings) == 2
    def __init__(
        self,
        vocab: Vocabulary,
        transformer_model: str = "roberta-large",
        override_weights_file: Optional[str] = None,
        **kwargs
    ) -> None:
        super().__init__(vocab, **kwargs)
        transformer_kwargs = {
            "model_name": transformer_model,
            "weights_path": override_weights_file,
        }
        self.embeddings = TransformerEmbeddings.from_pretrained_module(**transformer_kwargs)
        self.transformer_stack = TransformerStack.from_pretrained_module(**transformer_kwargs)
        self.pooler = TransformerPooler.from_pretrained_module(**transformer_kwargs)
        self.pooler_dropout = Dropout(p=0.1)

        self.linear_layer = torch.nn.Linear(self.pooler.get_output_dim(), 1)
        self.linear_layer.weight.data.normal_(mean=0.0, std=0.02)
        self.linear_layer.bias.data.zero_()

        self.loss = torch.nn.CrossEntropyLoss()

        from allennlp.training.metrics import CategoricalAccuracy

        self.accuracy = CategoricalAccuracy()
    def from_huggingface_model_name(
        cls,
        vocab: Vocabulary,
        model_name: str,
        image_feature_dim: int,
        image_num_hidden_layers: int,
        image_hidden_size: int,
        image_num_attention_heads: int,
        combined_hidden_size: int,
        combined_num_attention_heads: int,
        pooled_output_dim: int,
        image_intermediate_size: int,
        image_attention_dropout: float,
        image_hidden_dropout: float,
        image_biattention_id: List[int],
        text_biattention_id: List[int],
        text_fixed_layer: int,
        image_fixed_layer: int,
        pooled_dropout: float = 0.1,
        fusion_method: str = "sum",
        *,
        ignore_text: bool = False,
        ignore_image: bool = False,
    ):
        text_embeddings = TransformerEmbeddings.from_pretrained_module(
            model_name)

        image_embeddings = ImageFeatureEmbeddings(
            feature_size=image_feature_dim,
            embedding_size=image_hidden_size,
            dropout=image_hidden_dropout,
        )

        encoder = BiModalEncoder.from_pretrained_module(
            model_name,
            num_hidden_layers2=image_num_hidden_layers,
            hidden_size2=image_hidden_size,
            num_attention_heads2=image_num_attention_heads,
            combined_hidden_size=combined_hidden_size,
            combined_num_attention_heads=combined_num_attention_heads,
            intermediate_size2=image_intermediate_size,
            attention_dropout2=image_attention_dropout,
            hidden_dropout2=image_hidden_dropout,
            biattention_id1=text_biattention_id,
            biattention_id2=image_biattention_id,
            fixed_layer1=text_fixed_layer,
            fixed_layer2=image_fixed_layer,
        )
        return cls(
            vocab=vocab,
            text_embeddings=text_embeddings,
            image_embeddings=image_embeddings,
            encoder=encoder,
            pooled_output_dim=pooled_output_dim,
            fusion_method=fusion_method,
            dropout=pooled_dropout,
            ignore_text=ignore_text,
            ignore_image=ignore_image,
        )
Beispiel #6
0
 def __init__(self):
     super().__init__()
     self.embeddings = TransformerEmbeddings.get_relevant_module(
         "albert-base-v2")
     self.transformer = TransformerStack.from_pretrained_module(
         "bert-base-uncased")
     # We want to tune only the embeddings, because that's our experiment.
     self.transformer.requires_grad = False
Beispiel #7
0
    def setup_method(self):
        super().setup_method()

        self.params_dict = {key: val for key, val in PARAMS_DICT.items()}

        params = Params(copy.deepcopy(self.params_dict))

        self.transformer_embeddings = TransformerEmbeddings.from_params(params)
Beispiel #8
0
 def __init__(self):
     super().__init__()
     self.embeddings = TransformerEmbeddings.from_pretrained_module(
         pretrained, relevant_module="bert.embeddings")
     self.transformer = TransformerStack.from_pretrained_module(
         pretrained,
         num_hidden_layers=4,
         relevant_module="bert.encoder",
         strict=False,
     )
Beispiel #9
0
 def __init__(self):
     super().__init__()
     self.embeddings = TransformerEmbeddings.from_pretrained_module(
         "bert-base-uncased")
     self.separate_transformer = TransformerStack.from_pretrained_module(
         "bert-base-uncased", num_hidden_layers=range(0, 8))
     self.combined_transformer = TransformerStack.from_pretrained_module(
         "bert-base-uncased",
         num_hidden_layers=range(8, 12),
     )
Beispiel #10
0
 def test_loading_from_pretrained_weights_using_model_name(self, pretrained_name):
     pretrained_module = cached_transformers.get(pretrained_name, False).embeddings
     module = TransformerEmbeddings.from_pretrained_module(pretrained_name)
     mapping = {
         val: key
         for key, val in module._construct_default_mapping(
             pretrained_module, "huggingface", {}
         ).items()
     }
     missing = assert_equal_parameters(pretrained_module, module, mapping=mapping)
     assert len(missing) == 0
def test_output_size(params):
    input_ids = torch.tensor([[1, 2]])
    token_type_ids = torch.tensor([[1, 0]], dtype=torch.long)
    position_ids = torch.tensor([[0, 1]])
    params["output_size"] = 7
    module = TransformerEmbeddings.from_params(params)
    output = module(input_ids=input_ids,
                    token_type_ids=token_type_ids,
                    position_ids=position_ids)

    assert output.shape[-1] == 7
Beispiel #12
0
    def test_output_size(self):
        input_ids = torch.tensor([[1, 2]])
        token_type_ids = torch.tensor([[1, 0]], dtype=torch.long)
        position_ids = torch.tensor([[0, 1]])
        params = copy.deepcopy(self.params_dict)
        params["output_size"] = 7
        params = Params(params)
        module = TransformerEmbeddings.from_params(params)
        output = module.forward(
            input_ids=input_ids, token_type_ids=token_type_ids, position_ids=position_ids
        )

        assert output.shape[-1] == 7
def test_loading_albert():
    """
    Albert is a special case because it includes a Linear layer in the encoder
    that maps the embeddings to the encoder hidden size, but we include this linear
    layer within our embedding layer.
    """
    transformer_embedding = TransformerEmbeddings.from_pretrained_module(
        "albert-base-v2", )
    albert = AutoModel.from_pretrained("albert-base-v2")
    assert_allclose(
        transformer_embedding.embeddings.word_embeddings.weight.data,
        albert.embeddings.word_embeddings.weight.data,
    )
    assert_allclose(
        transformer_embedding.linear_transform.weight.data,
        albert.encoder.embedding_hidden_mapping_in.weight.data,
    )
Beispiel #14
0
 def __init__(self):
     super().__init__()
     self.embeddings = TransformerEmbeddings.from_pretrained_module(
         "bert-base-cased", relevant_module="bert.embeddings")
     self.separate_transformer = TransformerStack.from_pretrained_module(
         "bert-base-cased",
         relevant_module="bert.encoder",
         num_hidden_layers=8,
         strict=False,
     )
     self.combined_transformer = TransformerStack.from_pretrained_module(
         "bert-base-cased",
         relevant_module="bert.encoder",
         num_hidden_layers=4,
         mapping={
             f"layer.{l}": f"layers.{i}"
             for (i, l) in enumerate(range(8, 12))
         },
         strict=False,
     )
Beispiel #15
0
    def test_forward_against_huggingface_output(self, module_name, hf_module):
        input_ids = torch.tensor([[1, 2]])
        token_type_ids = torch.tensor([[1, 0]], dtype=torch.long)
        position_ids = torch.tensor([[0, 1]])

        torch.manual_seed(1234)
        embeddings = TransformerEmbeddings.from_pretrained_module(hf_module)

        torch.manual_seed(1234)
        embeddings = embeddings.eval()  # setting to eval mode to avoid non-deterministic dropout.
        output = embeddings.forward(
            input_ids=input_ids, token_type_ids=token_type_ids, position_ids=position_ids
        )

        torch.manual_seed(1234)
        hf_module = hf_module.eval()  # setting to eval mode to avoid non-deterministic dropout.
        hf_output = hf_module.forward(
            input_ids=input_ids, token_type_ids=token_type_ids, position_ids=position_ids
        )

        assert torch.allclose(output, hf_output)
Beispiel #16
0
    def test_end_to_end(self, model_name: str):
        data = [
            ("I'm against picketing", "but I don't know how to show it."),
            ("I saw a human pyramid once.", "It was very unnecessary."),
        ]
        tokenizer = cached_transformers.get_tokenizer(model_name)
        batch = tokenizer.batch_encode_plus(data,
                                            padding=True,
                                            return_tensors="pt")

        with torch.no_grad():
            huggingface_model = cached_transformers.get(
                model_name, make_copy=False).eval()
            huggingface_output = huggingface_model(**batch)

            embeddings = TransformerEmbeddings.from_pretrained_module(
                model_name).eval()
            transformer_stack = TransformerStack.from_pretrained_module(
                model_name).eval()
            pooler = TransformerPooler.from_pretrained_module(
                model_name).eval()
            batch["attention_mask"] = batch["attention_mask"].to(torch.bool)
            output = embeddings(**batch)
            output = transformer_stack(output, batch["attention_mask"])

            assert_allclose(
                output.final_hidden_states,
                huggingface_output.last_hidden_state,
                rtol=0.0001,
                atol=1e-4,
            )

            output = pooler(output.final_hidden_states)
            assert_allclose(output,
                            huggingface_output.pooler_output,
                            rtol=0.0001,
                            atol=1e-4)
    def __init__(
        self,
        vocab: Vocabulary,
        transformer_model: str = "roberta-large",
        num_labels: Optional[int] = None,
        label_namespace: str = "labels",
        override_weights_file: Optional[str] = None,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)
        transformer_kwargs = {
            "model_name": transformer_model,
            "weights_path": override_weights_file,
        }
        self.embeddings = TransformerEmbeddings.from_pretrained_module(
            **transformer_kwargs)
        self.transformer_stack = TransformerStack.from_pretrained_module(
            **transformer_kwargs)
        self.pooler = TransformerPooler.from_pretrained_module(
            **transformer_kwargs)
        self.pooler_dropout = Dropout(p=0.1)

        self.label_tokens = vocab.get_index_to_token_vocabulary(
            label_namespace)
        if num_labels is None:
            num_labels = len(self.label_tokens)
        self.linear_layer = torch.nn.Linear(self.pooler.get_output_dim(),
                                            num_labels)
        self.linear_layer.weight.data.normal_(mean=0.0, std=0.02)
        self.linear_layer.bias.data.zero_()

        from allennlp.training.metrics import CategoricalAccuracy, FBetaMeasure

        self.loss = torch.nn.CrossEntropyLoss()
        self.acc = CategoricalAccuracy()
        self.f1 = FBetaMeasure()
def transformer_embeddings(params):
    return TransformerEmbeddings.from_params(params.duplicate())
def test_loading_from_pretrained_module(pretrained_name):
    TransformerEmbeddings.from_pretrained_module(pretrained_name)
def test_no_token_type_layer(params):
    params["type_vocab_size"] = 0
    module = TransformerEmbeddings.from_params(params)
    assert len(module.embeddings) == 2
    def from_huggingface_model_name(
        cls,
        vocab: Vocabulary,
        model_name: str,
        image_feature_dim: int,
        image_num_hidden_layers: int,
        image_hidden_size: int,
        image_num_attention_heads: int,
        combined_hidden_size: int,
        combined_num_attention_heads: int,
        pooled_output_dim: int,
        image_intermediate_size: int,
        image_attention_dropout: float,
        image_hidden_dropout: float,
        image_biattention_id: List[int],
        text_biattention_id: List[int],
        text_fixed_layer: int,
        image_fixed_layer: int,
        pooled_dropout: float = 0.1,
        fusion_method: str = "sum",
        *,
        ignore_text: bool = False,
        ignore_image: bool = False,
    ):
        transformer = AutoModel.from_pretrained(model_name)

        # Albert (and maybe others?) has this "embedding_size", that's different from "hidden_size".
        # To get them to the same dimensionality, it uses a linear transform after the embedding
        # layer, which we need to pull out and copy here.
        if hasattr(transformer.config, "embedding_size"):
            config = transformer.config

            text_embeddings = TransformerEmbeddings.from_pretrained_module(
                transformer.embeddings, output_size=config.hidden_dim
            )

            from transformers.models.albert.modeling_albert import AlbertModel

            if isinstance(transformer, AlbertModel):
                text_embeddings.linear_transform = deepcopy(
                    transformer.encoder.embedding_hidden_mapping_in
                )
            else:
                logger.warning(
                    "Unknown model that uses separate embedding size; weights of the linear "
                    f"transform will not be initialized.  Model type is: {transformer.__class__}"
                )
        else:
            text_embeddings = TransformerEmbeddings.from_pretrained_module(transformer.embeddings)

        image_embeddings = ImageFeatureEmbeddings(
            feature_size=image_feature_dim,
            embedding_size=image_hidden_size,
            dropout=image_hidden_dropout,
        )

        encoder = BiModalEncoder.from_pretrained_module(
            pretrained_module=transformer,
            num_hidden_layers2=image_num_hidden_layers,
            hidden_size2=image_hidden_size,
            num_attention_heads2=image_num_attention_heads,
            combined_hidden_size=combined_hidden_size,
            combined_num_attention_heads=combined_num_attention_heads,
            intermediate_size2=image_intermediate_size,
            attention_dropout2=image_attention_dropout,
            hidden_dropout2=image_hidden_dropout,
            biattention_id1=text_biattention_id,
            biattention_id2=image_biattention_id,
            fixed_layer1=text_fixed_layer,
            fixed_layer2=image_fixed_layer,
        )
        return cls(
            vocab=vocab,
            text_embeddings=text_embeddings,
            image_embeddings=image_embeddings,
            encoder=encoder,
            pooled_output_dim=pooled_output_dim,
            fusion_method=fusion_method,
            dropout=pooled_dropout,
            ignore_text=ignore_text,
            ignore_image=ignore_image,
        )