Example #1
0
    def __init__(self, config: Config, output_encoded_layers: bool,
                 **kwarg) -> None:
        super().__init__(config, output_encoded_layers=output_encoded_layers)
        # assert config.pretrained_encoder.load_path, "Load path cannot be empty."
        self.encoder = SentenceEncoder(transformer=Transformer(
            vocab_size=config.vocab_size,
            embedding_dim=config.embedding_dim,
            layers=[
                TransformerLayer(
                    embedding_dim=config.embedding_dim,
                    attention=MultiheadSelfAttention(
                        config.embedding_dim, config.num_attention_heads),
                ) for _ in range(config.num_encoder_layers)
            ],
        ))
        self.apply(init_params)
        if config.model_path:
            with PathManager.open(config.model_path, "rb") as f:
                roberta_state = torch.load(f,
                                           map_location=lambda s, l:
                                           default_restore_location(s, "cpu"))
            # In case the model has previously been loaded in PyText and finetuned,
            # then we dont need to do the special state dict translation. Load
            # it directly
            if not config.is_finetuned:
                self.encoder.load_roberta_state_dict(roberta_state["model"])
            else:
                self.load_state_dict(roberta_state)

        self.representation_dim = self._embedding().weight.size(-1)
        log_class_usage(__class__)
Example #2
0
    def __init__(
        self,
        vocab_size: int,
        embedding_dim: int,
        num_attention_heads: int,
        num_encoder_layers: int,
        output_dropout: float,
        model_path: Optional[str] = None,
    ):
        super().__init__()
        self.transformer = Transformer(
            vocab_size=vocab_size,
            embedding_dim=embedding_dim,
            layers=[
                TransformerLayer(
                    embedding_dim=embedding_dim,
                    attention=MultiheadSelfAttention(
                        embedding_dim, num_attention_heads
                    ),
                )
                for _ in range(num_encoder_layers)
            ],
        )
        self.output_dropout = nn.Dropout(output_dropout)

        self.apply(init_params)
        if model_path:
            with PathManager.open(model_path, "rb") as f:
                roberta_state = torch.load(
                    f, map_location=lambda s, l: default_restore_location(s, "cpu")
                )
                if "model" in roberta_state:
                    roberta_state = translate_roberta_state_dict(roberta_state["model"])
                self.load_state_dict(roberta_state)
Example #3
0
    def testLoweringTransformerToTracedNVFastTransformer(self):
        V = 1000
        transformer = Transformer(vocab_size=V).cuda().eval().half()
        faster_transformer = NVFasterTransformerEncoder(transformer)
        faster_transformer_jit = None

        for _ in range(10):
            B = np.random.randint(low=0, high=64)
            max_T = np.random.randint(low=0, high=64)
            lengths = np.random.randint(low=0, high=max_T + 1, size=(B, ))
            tokens = torch.zeros(B, max_T).cuda().long()
            for b in range(B):
                length = lengths[b]
                tokens[b, :length] = (torch.randint(
                    transformer.padding_idx + 1, V - 1,
                    size=(1, length)).cuda().long())
                tokens[b, length:] = transformer.padding_idx
                if not faster_transformer_jit:
                    faster_transformer_jit = torch.jit.trace(
                        faster_transformer, (tokens, ))

                ref = transformer(tokens)
                fast = faster_transformer_jit(tokens)
                for rref, ffast in zip(ref, fast):
                    for b in range(B):
                        length = lengths[b]
                        torch.testing.assert_allclose(rref[:length, b],
                                                      ffast[:length, b],
                                                      atol=2e-2,
                                                      rtol=2e-2)
Example #4
0
    def testLoweringBaseTransformerToNVFastTransformerPadded(self):
        V = 1000
        transformer = Transformer(vocab_size=V).cuda().eval().half()
        faster_transformer = NVFasterTransformerEncoder(transformer)

        for B in range(1, 32):
            for max_T in [0, 1, 2, 6, 40, 127]:
                lengths = np.random.randint(low=0, high=max_T + 1, size=(B, ))
                tokens = torch.zeros(B, max_T).cuda().long()
                for b in range(B):
                    length = lengths[b]
                    tokens[b, :length] = (torch.randint(
                        transformer.padding_idx + 1, V - 1,
                        size=(1, length)).cuda().long())
                    tokens[b, length:] = transformer.padding_idx

                ref = transformer(tokens)
                fast = faster_transformer(tokens)
                for rref, ffast in zip(ref, fast):
                    for b in range(B):
                        length = lengths[b]
                        torch.testing.assert_allclose(rref[:length, b],
                                                      ffast[:length, b],
                                                      atol=2e-2,
                                                      rtol=2e-2)
Example #5
0
    def __init__(self, config: Config, output_encoded_layers: bool,
                 **kwarg) -> None:
        super().__init__(config, output_encoded_layers=output_encoded_layers)

        # map to the real model_path
        config.model_path = (resources.roberta.RESOURCE_MAP[config.model_path]
                             if config.model_path
                             in resources.roberta.RESOURCE_MAP else
                             config.model_path)
        # assert config.pretrained_encoder.load_path, "Load path cannot be empty."
        # sharing compression across each layers

        # create compress layer if use linear multihead attention
        if config.use_linformer_encoder:
            compress_layer = nn.Linear(
                config.max_seq_len - 2,
                (config.max_seq_len - 2) // config.linformer_compressed_ratio,
            )

        self.encoder = SentenceEncoder(transformer=Transformer(
            vocab_size=config.vocab_size,
            embedding_dim=config.embedding_dim,
            layers=[
                TransformerLayer(
                    embedding_dim=config.embedding_dim,
                    attention=MultiheadLinearAttention(
                        embed_dim=config.embedding_dim,
                        num_heads=config.num_attention_heads,
                        compress_layer=compress_layer,
                    ) if config.
                    use_linformer_encoder else MultiheadSelfAttention(
                        embed_dim=config.embedding_dim,
                        num_heads=config.num_attention_heads,
                    ),
                ) for _ in range(config.num_encoder_layers)
            ],
            max_seq_len=config.max_seq_len,
        ))
        self.apply(init_params)
        if config.model_path:
            with PathManager.open(config.model_path, "rb") as f:
                roberta_state = torch.load(f,
                                           map_location=lambda s, l:
                                           default_restore_location(s, "cpu"))
            # In case the model has previously been loaded in PyText and finetuned,
            # then we dont need to do the special state dict translation. Load
            # it directly
            if not config.is_finetuned:
                self.encoder.load_roberta_state_dict(roberta_state["model"])
            else:
                self.load_state_dict(roberta_state)

        self.representation_dim = self._embedding().weight.size(-1)
        self.export_encoder = config.export_encoder
        self.variable_size_embedding = config.variable_size_embedding
        log_class_usage(__class__)
Example #6
0
 def _small_encoder(self):
     layers = [
         TransformerLayer(
             embedding_dim=12,
             attention=MultiheadSelfAttention(
                 embed_dim=12, num_heads=12, scaling=0.125
             ),
         )
         for _ in range(2)
     ]
     transformer = Transformer(vocab_size=100, embedding_dim=12, layers=layers)
     return SentenceEncoder(transformer)
Example #7
0
    def testLoweringBaseTransformerToNVFastTransformer(self):
        V = 1000
        transformer = Transformer(vocab_size=V).cuda().eval().half()
        faster_transformer = NVFasterTransformerEncoder(transformer)

        for B in range(1, 32):
            for T in [0, 1, 7, 8, 16]:
                tokens = (torch.randint(transformer.padding_idx + 1,
                                        V - 1,
                                        size=(B, T)).cuda().long())
                ref = transformer(tokens)
                fast = faster_transformer(tokens)
                for rref, ffast in zip(ref, fast):
                    torch.testing.assert_allclose(rref,
                                                  ffast,
                                                  atol=2e-2,
                                                  rtol=2e-2)
Example #8
0
 def __init__(self, config: Config, output_encoded_layers: bool,
              **kwarg) -> None:
     super().__init__(config, output_encoded_layers=output_encoded_layers)
     # assert config.pretrained_encoder.load_path, "Load path cannot be empty."
     self.encoder = SentenceEncoder(transformer=Transformer(
         embedding_dim=config.embedding_dim,
         layers=[
             TransformerLayer(
                 embedding_dim=config.embedding_dim,
                 attention=MultiheadSelfAttention(
                     config.embedding_dim, config.num_attention_heads),
             ) for _ in range(config.num_encoder_layers)
         ],
     ))
     roberta_state = torch.load(
         config.model_path,
         map_location=lambda s, l: default_restore_location(s, "cpu"),
     )
     self.encoder.load_roberta_state_dict(roberta_state["model"])
     self.representation_dim = self.encoder.transformer.token_embedding.weight.size(
         -1)
Example #9
0
    def testLoweringLargeTransformerToNVFastTransformer(self):
        V = 1000
        L = 24
        D = 1024
        H = 16
        layers = [
            TransformerLayer(
                embedding_dim=D,
                attention=MultiheadSelfAttention(embed_dim=D, num_heads=H),
            ) for _ in range(L)
        ]

        transformer = (Transformer(vocab_size=V,
                                   embedding_dim=D,
                                   layers=layers).cuda().eval().half())
        faster_transformer = NVFasterTransformerEncoder(transformer)
        for _ in range(10):
            B = np.random.randint(low=0, high=32)
            max_T = np.random.randint(low=0, high=32)
            lengths = np.random.randint(low=0, high=max_T + 1, size=(B, ))
            tokens = torch.zeros(B, max_T).cuda().long()
            for b in range(B):
                length = lengths[b]
                tokens[b, :length] = (torch.randint(
                    transformer.padding_idx + 1, V - 1,
                    size=(1, length)).cuda().long())
                tokens[b, length:] = transformer.padding_idx

                ref = transformer(tokens)
                fast = faster_transformer(tokens)
                for rref, ffast in zip(ref, fast):
                    for b in range(B):
                        length = lengths[b]
                        torch.testing.assert_allclose(rref[:length, b],
                                                      ffast[:length, b],
                                                      atol=3e-2,
                                                      rtol=2e-2)
Example #10
0
    def __init__(self, config: Config, output_encoded_layers: bool,
                 **kwarg) -> None:
        super().__init__(config, output_encoded_layers=output_encoded_layers)

        # map to the real model_path
        config.model_path = (resources.roberta.RESOURCE_MAP[config.model_path]
                             if config.model_path
                             in resources.roberta.RESOURCE_MAP else
                             config.model_path)
        # assert config.pretrained_encoder.load_path, "Load path cannot be empty."
        # sharing compression across each layers

        # create compress layer if use linear multihead attention
        if config.use_linformer_encoder:
            compress_layer = nn.Linear(
                config.max_seq_len - 2,
                (config.max_seq_len - 2) // config.linformer_compressed_ratio,
            )

        self.use_selfie_encoder = config.use_selfie_encoder

        if config.use_linformer_encoder:
            if config.linformer_quantize:
                layers = [
                    TransformerLayer(
                        embedding_dim=config.embedding_dim,
                        attention=QuantizedMultiheadLinearAttention(
                            embed_dim=config.embedding_dim,
                            num_heads=config.num_attention_heads,
                            compress_layer=compress_layer,
                        ),
                    ) for _ in range(config.num_encoder_layers)
                ]
            else:
                layers = [
                    TransformerLayer(
                        embedding_dim=config.embedding_dim,
                        attention=MultiheadLinearAttention(
                            embed_dim=config.embedding_dim,
                            num_heads=config.num_attention_heads,
                            compress_layer=compress_layer,
                        ),
                    ) for _ in range(config.num_encoder_layers)
                ]
        else:
            layers = [
                TransformerLayer(
                    embedding_dim=config.embedding_dim,
                    attention=MultiheadSelfAttention(
                        embed_dim=config.embedding_dim,
                        num_heads=config.num_attention_heads,
                    ),
                ) for _ in range(config.num_encoder_layers)
            ]

        self.encoder = (SentenceEncoder(transformer=Transformer(
            vocab_size=config.vocab_size,
            embedding_dim=config.embedding_dim,
            layers=layers,
            max_seq_len=config.max_seq_len,
        )) if not self.use_selfie_encoder else PostEncoder(
            transformer=SELFIETransformer(
                vocab_size=config.vocab_size,
                embedding_dim=config.embedding_dim,
                layers=layers,
                max_seq_len=config.max_seq_len,
            )))
        self.apply(init_params)
        if config.model_path:
            with PathManager.open(config.model_path, "rb") as f:
                roberta_state = torch.load(f,
                                           map_location=lambda s, l:
                                           default_restore_location(s, "cpu"))
            # In case the model has previously been loaded in PyText and finetuned,
            # then we dont need to do the special state dict translation. Load
            # it directly
            if not config.is_finetuned:
                self.encoder.load_roberta_state_dict(roberta_state["model"])
            else:
                self.load_state_dict(roberta_state)

        if config.use_bias_finetuning:
            for (n, p) in self.encoder.named_parameters():
                # "encoder.transformer.layers.0.attention.input_projection.weight" -> false
                # "encoder.transformer.layers.0.attention.input_projection.bias" -> true
                if n.split(".")[-1] != "bias":
                    p.requires_grad_(False)

        self.export_encoder = config.export_encoder
        self.variable_size_embedding = config.variable_size_embedding
        self.use_linformer_encoder = config.use_linformer_encoder
        log_class_usage(__class__)
Example #11
0
 def _small_encoder(self):
     layers = [TransformerLayer(embedding_dim=12) for _ in range(2)]
     transformer = Transformer(vocab_size=100,
                               embedding_dim=12,
                               layers=layers)
     return SentenceEncoder(transformer)