Example #1
0
    def __init__(self, hparams=None):
        ModuleBase.__init__(self, hparams)

        hidden_dim = self._hparams.hidden_dim
        ffn_inner_dim = self._hparams.ffn_inner_dim
        dropout = self._hparams.dropout
        activation = self._hparams.activation
        if activation == 'gelu':
            activation = layers.gelu

        with tf.variable_scope(self.variable_scope):
            tf.get_variable_scope().set_initializer(
                layers.get_initializer(self._hparams.initializer))
            l1_hparams = {
                "type": "Dense",
                "kwargs": {
                    "units": ffn_inner_dim,
                    "activation": activation
                }
            }
            self.linear1 = layers.get_layer(hparams=l1_hparams)
            dropout_hparams = {
                "type": "Dropout",
                "kwargs": {
                    "rate": dropout
                }
            }
            self.dropout = layers.get_layer(hparams=dropout_hparams)
            l2_hparams = {
                "type": "Dense",
                "kwargs": {
                    "units": hidden_dim
                }
            }
            self.linear2 = layers.get_layer(hparams=l2_hparams)
    def __init__(self, hparams=None):
        EncoderBase.__init__(self, hparams)

        with tf.variable_scope(self.variable_scope):
            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            self.multihead_attention_list = []
            self.poswise_networks = []
            for i in range(self._hparams.num_blocks):
                with tf.variable_scope("layer_{}".format(i)):

                    with tf.variable_scope('attention'):
                        mh_attn = MultiheadAttentionEncoder(
                            self._hparams.multihead_attention)
                        self.multihead_attention_list.append(mh_attn)

                        if self._hparams.dim != mh_attn.hparams.output_dim:
                            raise ValueError(
                                'The "dim" in the hparams of '
                                '"multihead_attention" should be equal to the '
                                '"dim" of TransformerEncoder')

                    pw_net = FeedForwardNetwork(
                        hparams=self._hparams['poswise_feedforward'])
                    final_dim = pw_net.hparams.layers[-1]['kwargs']['units']
                    if self._hparams.dim != final_dim:
                        raise ValueError(
                            'The output dimenstion of '
                            '"poswise_feedforward" should be equal '
                            'to the "dim" of TransformerEncoder.')
                    self.poswise_networks.append(pw_net)
    def __init__(self, vocab_size=None, output_layer=None, hparams=None):
        ModuleBase.__init__(self, hparams)

        with tf.variable_scope(self.variable_scope):
            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            # Make the output layer
            self._output_layer, self._vocab_size = _make_output_layer(
                output_layer, vocab_size, self._hparams.output_layer_bias,
                self.variable_scope)

            # Make attention and poswise networks
            self.multihead_attentions = {'self_att': [], 'encdec_att': []}
            self.poswise_networks = []
            for i in range(self._hparams.num_blocks):
                layer_name = 'layer_{}'.format(i)
                with tf.variable_scope(layer_name):
                    with tf.variable_scope("self_attention"):
                        multihead_attention = MultiheadAttentionEncoder(
                            self._hparams.multihead_attention)
                        self.multihead_attentions['self_att'].append(
                            multihead_attention)

                    if self._hparams.dim != \
                            multihead_attention.hparams.output_dim:
                        raise ValueError('The output dimenstion of '
                                         'MultiheadEncoder should be equal '
                                         'to the dim of TransformerDecoder')

                    with tf.variable_scope('encdec_attention'):
                        multihead_attention = MultiheadAttentionEncoder(
                            self._hparams.multihead_attention)
                        self.multihead_attentions['encdec_att'].append(
                            multihead_attention)

                    if self._hparams.dim != \
                            multihead_attention.hparams.output_dim:
                        raise ValueError('The output dimenstion of '
                                         'MultiheadEncoder should be equal '
                                         'to the dim of TransformerDecoder')

                    pw_net = FeedForwardNetwork(
                        hparams=self._hparams['poswise_feedforward'])
                    final_dim = pw_net.hparams.layers[-1]['kwargs']['units']
                    if self._hparams.dim != final_dim:
                        raise ValueError(
                            'The output dimenstion of '
                            '"poswise_feedforward" should be equal '
                            'to the "dim" of TransformerDecoder.')
                    self.poswise_networks.append(pw_net)

            # Built in _build()
            self.context = None
            self.context_sequence_length = None
            self.embedding = None
            self._helper = None
            self._cache = None
            self.max_decoding_length = None
Example #4
0
    def __init__(self,
                 r_r_bias,
                 r_w_bias,
                 r_s_bias=None,
                 segment_embed=None,
                 hparams=None):
        ModuleBase.__init__(self, hparams=hparams)

        self.num_heads = self._hparams.num_heads
        self.head_dim = self._hparams.head_dim
        hidden_dim = self._hparams.hidden_dim

        with tf.variable_scope(self.variable_scope):
            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            # Official implementation creates these head variables.
            # If we create dense layers instead, there would be dimension
            # mismatch while loading the tensors
            # TODO(avinash) : Can we reshape tensors while loading the ckpt?
            self.q_head = tf.get_variable(
                'q/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.k_head = tf.get_variable(
                'k/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.v_head = tf.get_variable(
                'v/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.k_head_r = tf.get_variable(
                'r/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.dropout = layers.get_layer(hparams={
                "type": "Dropout",
                "kwargs": {
                    "rate": self._hparams.dropout
                }
            })

            self.dropout_attn = layers.get_layer(
                hparams={
                    "type": "Dropout",
                    "kwargs": {
                        "rate": self._hparams.attention_dropout
                    }
                })

            self.output_projection = tf.get_variable(
                'o/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.r_r_bias = r_r_bias
            self.r_w_bias = r_w_bias

            if self._hparams.use_segments:
                self.segment_embed = segment_embed
                self.r_s_bias = r_s_bias

            self.scale = 1 / (self.head_dim**0.5)
Example #5
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):
        super(XLNetClassifier, self).__init__(hparams=hparams)

        with tf.variable_scope(self.variable_scope):
            tf.get_variable_scope().set_initializer(
                get_initializer(self._hparams.initializer))
            # Creates the underlying encoder
            encoder_hparams = dict_fetch(hparams,
                                         XLNetEncoder.default_hparams())
            if encoder_hparams is not None:
                encoder_hparams['name'] = "encoder"
            self._encoder = XLNetEncoder(
                pretrained_model_name=pretrained_model_name,
                cache_dir=cache_dir,
                hparams=encoder_hparams)
            if self._hparams.use_projection:
                self.projection = get_layer(
                    hparams={
                        "type": "Dense",
                        "kwargs": {
                            "units": self._encoder.output_size
                        }
                    })

            # Creates an dropout layer
            drop_kwargs = {"rate": self._hparams.dropout}
            layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs}
            self._dropout_layer = get_layer(hparams=layer_hparams)

            # Creates an additional classification layer if needed
            self._num_classes = self._hparams.num_classes
            if self._num_classes <= 0:
                self._logit_layer = None
            else:
                logit_kwargs = self._hparams.logit_layer_kwargs
                if logit_kwargs is None:
                    logit_kwargs = {}
                elif not isinstance(logit_kwargs, HParams):
                    raise ValueError(
                        "hparams['logit_layer_kwargs'] must be a dict.")
                else:
                    logit_kwargs = logit_kwargs.todict()
                logit_kwargs.update({"units": self._num_classes})
                if 'name' not in logit_kwargs:
                    logit_kwargs['name'] = "logit_layer"

                layer_hparams = {"type": "Dense", "kwargs": logit_kwargs}
                self._logit_layer = get_layer(hparams=layer_hparams)
Example #6
0
def get_embedding(hparams=None,
                  init_value=None,
                  num_embeds=None,
                  variable_scope='Embedding'):
    r"""Creates embedding variable if not exists.

    Args:
        hparams (dict or HParams, optional): Embedding hyperparameters. Missing
            hyperparameters are set to default values. See
            :func:`~texar.tf.modules.default_embedding_hparams`
            for all hyperparameters and default values.

            If :attr:`init_value` is given, :attr:`hparams["initializer"]`,
            and :attr:`hparams["dim"]` are ignored.
        init_value (Tensor or numpy array, optional): Initial values of the
            embedding variable. If not given, embedding is initialized as
            specified in :attr:`hparams["initializer"]`.
        num_embeds (int, optional): The number of embedding items
            (e.g., vocabulary size). Required if :attr:`init_value` is
            not provided.
        variable_scope (str or VariableScope, optional): Variable scope of
            the embedding variable.

    Returns:
        Variable or Tensor: A 2D `Variable` or `Tensor` of the same shape with
        :attr:`init_value` or of the shape ``[num_embeds, hparams["dim"]]``.
    """
    with tf.variable_scope(variable_scope):
        if hparams is None or isinstance(hparams, dict):
            hparams = HParams(hparams, default_embedding_hparams())
        regularizer = layers.get_regularizer(hparams["regularizer"])
        if init_value is None:
            initializer = layers.get_initializer(hparams["initializer"])
            dim = hparams["dim"]
            if not isinstance(hparams["dim"], (list, tuple)):
                dim = [dim]
            embedding = tf.get_variable(name='w',
                                        shape=[num_embeds] + dim,
                                        initializer=initializer,
                                        regularizer=regularizer,
                                        trainable=hparams["trainable"])
        else:
            init_value = tf.cast(init_value, tf.float32)
            embedding = tf.get_variable(name='w',
                                        initializer=init_value,
                                        regularizer=regularizer,
                                        trainable=hparams["trainable"])

        return embedding
Example #7
0
    def __init__(self,
                 vocab_size=None,
                 output_layer=None,
                 tau=None,
                 hparams=None):
        EncoderBase.__init__(self, hparams)

        with tf.variable_scope(self.variable_scope):
            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            # Make the output layer
            self._output_layer, self._vocab_size = _make_output_layer(
                output_layer, vocab_size, self._hparams.output_layer_bias,
                self.variable_scope)

            # Make attention and poswise networks
            self.graph_multihead_attention_list = []
            self.poswise_networks = []
            for i in range(self._hparams.num_blocks):
                with tf.variable_scope("layer_{}".format(i)):

                    with tf.variable_scope('attention'):
                        mh_attn = GraphMultiheadAttentionEncoder(
                            self._hparams.graph_multihead_attention)
                        self.graph_multihead_attention_list.append(mh_attn)

                        if self._hparams.dim != mh_attn.hparams.output_dim:
                            raise ValueError(
                                'The "dim" in the hparams of '
                                '"multihead_attention" should be equal to the '
                                '"dim" of CrossGraphTransformerFixedLengthDecoder'
                            )

                    pw_net = FeedForwardNetwork(
                        hparams=self._hparams['poswise_feedforward'])
                    final_dim = pw_net.hparams.layers[-1]['kwargs']['units']
                    if self._hparams.dim != final_dim:
                        raise ValueError(
                            'The output dimenstion of '
                            '"poswise_feedforward" should be equal '
                            'to the "dim" of CrossGraphTransformerFixedLengthDecoder.'
                        )
                    self.poswise_networks.append(pw_net)

            self._helper = None
            self._tau = tau
Example #8
0
    def __init__(self, hparams=None):
        EncoderBase.__init__(self, hparams)
        use_bias = self._hparams.use_bias

        with tf.variable_scope(self.variable_scope):
            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            self.Q_dense = tf.layers.Dense(self._hparams.num_units,
                                           use_bias=use_bias,
                                           name='query')
            self.K_dense = tf.layers.Dense(self._hparams.num_units,
                                           use_bias=use_bias,
                                           name='key')
            self.V_dense = tf.layers.Dense(self._hparams.num_units,
                                           use_bias=use_bias,
                                           name='value')
            self.O_dense = tf.layers.Dense(self._hparams.output_dim,
                                           use_bias=use_bias,
                                           name='output')
Example #9
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):
        PretrainedBase.__init__(self, pretrained_model_name, cache_dir,
                                hparams)
        if self.pretrained_model_dir:
            self._hparams = HParams(self.pretrained_model_hparams,
                                    self._hparams.todict())

        with tf.variable_scope(self.variable_scope):
            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            # Word embedding
            self.word_embedder = WordEmbedder(
                vocab_size=self._hparams.vocab_size,
                hparams=self._hparams.embed)

            # Segment embedding for each type of tokens
            self.segment_embedder = WordEmbedder(
                vocab_size=self._hparams.type_vocab_size,
                hparams=self._hparams.segment_embed)

            # Position embedding
            self.position_embedder = PositionEmbedder(
                position_size=self._hparams.position_size,
                hparams=self._hparams.position_embed)

            # The BERT encoder (a TransformerEncoder)
            self.encoder = TransformerEncoder(hparams=self._hparams.encoder)

            with tf.variable_scope("pooler"):
                kwargs_i = {
                    "units": self._hparams.hidden_size,
                    "activation": tf.tanh
                }
                layer_hparams = {"type": "Dense", "kwargs": kwargs_i}
                self.pooler = layers.get_layer(hparams=layer_hparams)
Example #10
0
 def reset_parameters(self):
     with tf.variable_scope(self.variable_scope):
         if self._hparams.initializer:
             tf.get_variable_scope().set_initializer(
                 get_initializer(self._hparams.initializer))
Example #11
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):
        PretrainedBase.__init__(self, pretrained_model_name, cache_dir,
                                hparams)

        if self.pretrained_model_dir:
            self._hparams = HParams(self.pretrained_model_hparams,
                                    self._hparams.todict())

        num_layers = self._hparams.num_layers
        use_segments = self._hparams.use_segments
        untie_r = self._hparams.untie_r

        with tf.variable_scope(self.variable_scope):

            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            if untie_r:
                self.r_w_bias = tf.get_variable('r_w_bias', [
                    num_layers, self._hparams.num_heads, self._hparams.head_dim
                ],
                                                dtype=tf.float32)
                self.r_r_bias = tf.get_variable('r_r_bias', [
                    num_layers, self._hparams.num_heads, self._hparams.head_dim
                ],
                                                dtype=tf.float32)
            else:
                self.r_w_bias = tf.get_variable(
                    'r_w_bias',
                    [self._hparams.num_heads, self._hparams.head_dim],
                    dtype=tf.float32)
                self.r_r_bias = tf.get_variable(
                    'r_r_bias',
                    [self._hparams.num_heads, self._hparams.head_dim],
                    dtype=tf.float32)

            if use_segments:
                self.segment_embed = tf.get_variable('seg_embed', [
                    num_layers, 2, self._hparams.num_heads,
                    self._hparams.head_dim
                ],
                                                     dtype=tf.float32)
                self.r_s_bias = (tf.get_variable(
                    'r_s_bias', [
                        num_layers, self._hparams.num_heads,
                        self._hparams.head_dim
                    ],
                    dtype=tf.float32) if untie_r else tf.get_variable(
                        'r_s_bias',
                        [self._hparams.num_heads, self._hparams.head_dim],
                        dtype=tf.float32))
            else:
                self.segment_embed = None
                self.r_s_bias = None

            # Word embedding
            self.word_embedder = WordEmbedder(
                vocab_size=self._hparams.vocab_size,
                hparams={"dim": self._hparams.hidden_dim})

            # Position embedding
            self.pos_embed = RelativePositionalEncoding(
                hparams={
                    "dim": self._hparams.hidden_dim,
                    "max_seq_len": self._hparams.max_seq_len
                })

            self.attn_layers = []
            self.ff_layers = []
            rel_attn_hparams = dict_fetch(
                self._hparams, RelativeMutiheadAttention.default_hparams())
            rel_attn_hparams["name"] = "rel_attn"

            ff_hparams = dict_fetch(self._hparams,
                                    PositionWiseFF.default_hparams())
            ff_hparams["name"] = "ff"

            for i in range(num_layers):
                with tf.variable_scope("layer_{}".format(i)):
                    if self._hparams.untie_r:
                        if use_segments:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias[i],
                                    self.r_w_bias[i],
                                    self.r_s_bias[i],
                                    self.segment_embed[i],
                                    hparams=rel_attn_hparams))
                        else:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias[i],
                                    self.r_w_bias[i],
                                    hparams=rel_attn_hparams))
                    else:
                        if use_segments:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias,
                                    self.r_w_bias,
                                    self.r_s_bias,
                                    self.segment_embed[i],
                                    hparams=rel_attn_hparams))
                        else:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias,
                                    self.r_w_bias,
                                    hparams=rel_attn_hparams))
                    self.ff_layers.append(PositionWiseFF(hparams=ff_hparams))

            dropout_hparams = {
                "type": "Dropout",
                "kwargs": {
                    "rate": self._hparams.dropout
                }
            }
            self.dropout = layers.get_layer(hparams=dropout_hparams)

            self.mask_embed = tf.get_variable('mask_emb',
                                              [1, 1, self.hparams.hidden_dim],
                                              dtype=tf.float32)