コード例 #1
0
    def test_position_embedder_trainable(self):
        """Tests freezing the embedding parameters.
        """
        pos_size = 100

        embedder = PositionEmbedder(position_size=pos_size,
                                    hparams={"trainable": False})
        self.assertEqual(len(embedder.trainable_variables), 0)

        embedder = PositionEmbedder(position_size=pos_size)
        self.assertEqual(len(embedder.trainable_variables), 1)
コード例 #2
0
    def __init__(self,
                 pretrained_model_name: Optional[str] = None,
                 cache_dir: Optional[str] = None,
                 hparams=None):
        super().__init__(hparams=hparams)

        self.load_pretrained_config(pretrained_model_name, cache_dir)

        # Word embedding
        self.word_embedder = WordEmbedder(vocab_size=self._hparams.vocab_size,
                                          hparams=self._hparams.embed)

        # Segment embedding for each type of tokens
        self.segment_embedder = None
        if self._hparams.get('type_vocab_size', 0) > 0:
            self.segment_embedder = WordEmbedder(
                vocab_size=self._hparams.type_vocab_size,
                hparams=self._hparams.segment_embed)

        # Position embedding
        self.position_embedder = PositionEmbedder(
            position_size=self._hparams.position_size,
            hparams=self._hparams.position_embed)

        # The BERT encoder (a TransformerEncoder)
        self.encoder = TransformerEncoder(hparams=self._hparams.encoder)

        self.pooler = nn.Sequential(
            nn.Linear(self._hparams.hidden_size, self._hparams.hidden_size),
            nn.Tanh())

        self.init_pretrained_weights()
コード例 #3
0
    def _test_position_embedder(self, hparams):
        """Tests :class:`texar.torch.modules.PositionEmbedder`.
        """
        pos_size = 100
        embedder = PositionEmbedder(
            position_size=pos_size, hparams=hparams)
        inputs = torch.randint(embedder.num_embeds, (64, 16), dtype=torch.long)
        outputs = embedder(inputs)

        if isinstance(embedder.dim, (list, tuple)):
            emb_dim = tuple(embedder.dim)
        else:
            emb_dim = (embedder.dim,)

        if isinstance(hparams["dim"], (list, tuple)):
            hparams_dim = tuple(hparams["dim"])
        else:
            hparams_dim = (hparams["dim"],)

        self.assertEqual(outputs.size(), (64, 16) + emb_dim)
        self.assertEqual(outputs.size(-1), embedder.output_size)
        self.assertEqual(emb_dim, hparams_dim)
        self.assertEqual(embedder.position_size, 100)
        seq_length = torch.empty(64).uniform_(pos_size).long()
        outputs = embedder(sequence_length=seq_length)
コード例 #4
0
ファイル: gpt2_encoder.py プロジェクト: zlyx26/texar-pytorch
    def __init__(self,
                 pretrained_model_name: Optional[str] = None,
                 cache_dir: Optional[str] = None,
                 hparams=None):
        super().__init__(hparams=hparams)

        self.load_pretrained_config(pretrained_model_name, cache_dir)

        # Word embedding
        self.word_embedder = WordEmbedder(vocab_size=self._hparams.vocab_size,
                                          hparams=self._hparams.embed)

        # Position embedding
        self.position_embedder = PositionEmbedder(
            position_size=self._hparams.position_size,
            hparams=self._hparams.position_embed)

        # The GPT2 encoder (a TransformerEncoder)
        self.encoder = TransformerEncoder(hparams=self._hparams.encoder)

        self.init_pretrained_weights(load_output_layer=False)
コード例 #5
0
    def __init__(self, hparams=None):
        super().__init__(hparams=hparams)

        # Segment embedding for each type of tokens
        self.segment_embedder = None
        if self._hparams.get('type_vocab_size', 0) > 0:
            self.segment_embedder = WordEmbedder(
                vocab_size=self._hparams.type_vocab_size,
                hparams=self._hparams.segment_embed)

        # Position embedding
        self.position_embedder = PositionEmbedder(
            position_size=self._hparams.position_size,
            hparams=self._hparams.position_embed)

        # The BERT encoder (a TransformerEncoder)
        self.encoder = TransformerEncoder(hparams=self._hparams.encoder)

        self.pooler = nn.Sequential(
            nn.Linear(self._hparams.encoder.dim, self._hparams.hidden_size),
            nn.Tanh())
コード例 #6
0
    def __init__(self,
                 pretrained_model_name: Optional[str] = None,
                 cache_dir: Optional[str] = None,
                 hparams=None):
        self.load_pretrained_config(pretrained_model_name, cache_dir, hparams)

        # Word embedding
        word_embedder = WordEmbedder(vocab_size=self._hparams.vocab_size,
                                     hparams=self._hparams.embed)

        # Position embedding
        position_embedder = PositionEmbedder(
            position_size=self._hparams.position_size,
            hparams=self._hparams.position_embed)

        # The GPT2 encoder (a TransformerEncoder)
        super().__init__(hparams=None)

        # Register modules after `__init__` is called.
        self.word_embedder = word_embedder
        self.position_embedder = position_embedder

        self.init_pretrained_weights(load_output_layer=False)