def __init__(self, vocab_size=None, output_layer=None, hparams=None):
        ModuleBase.__init__(self, hparams)

        with tf.variable_scope(self.variable_scope):
            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            # Make the output layer
            self._output_layer, self._vocab_size = _make_output_layer(
                output_layer, vocab_size, self._hparams.output_layer_bias,
                self.variable_scope)

            # Make attention and poswise networks
            self.multihead_attentions = {'self_att': [], 'encdec_att': []}
            self.poswise_networks = []
            for i in range(self._hparams.num_blocks):
                layer_name = 'layer_{}'.format(i)
                with tf.variable_scope(layer_name):
                    with tf.variable_scope("self_attention"):
                        multihead_attention = MultiheadAttentionEncoder(
                            self._hparams.multihead_attention)
                        self.multihead_attentions['self_att'].append(
                            multihead_attention)

                    if self._hparams.dim != \
                            multihead_attention.hparams.output_dim:
                        raise ValueError('The output dimenstion of '
                                         'MultiheadEncoder should be equal '
                                         'to the dim of TransformerDecoder')

                    with tf.variable_scope('encdec_attention'):
                        multihead_attention = MultiheadAttentionEncoder(
                            self._hparams.multihead_attention)
                        self.multihead_attentions['encdec_att'].append(
                            multihead_attention)

                    if self._hparams.dim != \
                            multihead_attention.hparams.output_dim:
                        raise ValueError('The output dimenstion of '
                                         'MultiheadEncoder should be equal '
                                         'to the dim of TransformerDecoder')

                    pw_net = FeedForwardNetwork(
                        hparams=self._hparams['poswise_feedforward'])
                    final_dim = pw_net.hparams.layers[-1]['kwargs']['units']
                    if self._hparams.dim != final_dim:
                        raise ValueError(
                            'The output dimenstion of '
                            '"poswise_feedforward" should be equal '
                            'to the "dim" of TransformerDecoder.')
                    self.poswise_networks.append(pw_net)

            # Built in _build()
            self.context = None
            self.context_sequence_length = None
            self.embedding = None
            self._helper = None
            self._cache = None
            self.max_decoding_length = None
Beispiel #2
0
    def __init__(self, hparams=None):
        ModuleBase.__init__(self, hparams)

        hidden_dim = self._hparams.hidden_dim
        ffn_inner_dim = self._hparams.ffn_inner_dim
        dropout = self._hparams.dropout
        activation = self._hparams.activation
        if activation == 'gelu':
            activation = layers.gelu

        with tf.variable_scope(self.variable_scope):
            tf.get_variable_scope().set_initializer(
                layers.get_initializer(self._hparams.initializer))
            l1_hparams = {
                "type": "Dense",
                "kwargs": {
                    "units": ffn_inner_dim,
                    "activation": activation
                }
            }
            self.linear1 = layers.get_layer(hparams=l1_hparams)
            dropout_hparams = {
                "type": "Dropout",
                "kwargs": {
                    "rate": dropout
                }
            }
            self.dropout = layers.get_layer(hparams=dropout_hparams)
            l2_hparams = {
                "type": "Dense",
                "kwargs": {
                    "units": hidden_dim
                }
            }
            self.linear2 = layers.get_layer(hparams=l2_hparams)
Beispiel #3
0
    def __init__(self, hparams=None):
        ModuleBase.__init__(self, hparams)
        self.dense_layers = [
            Dense(self._hparams.input_dim,
                  activation='relu',
                  use_bias=True,
                  kernel_regularizer=l2(self._hparams.l2_reg))
        ]

        if self._hparams.l2_reg == 0.0:
            self.regularizer = None
        else:
            self.regularizer = tf.contrib.layers.l2_regularizer(
                scale=self._hparams.l2_reg)

        self.neigh_weights = tf.get_variable(
            'neigh_weights',
            [self._hparams.input_dim * 2, self._hparams.output_dim],
            initializer=tf.contrib.layers.xavier_initializer(),
            regularizer=self.regularizer,
            trainable=True)  ###check?

        if self._hparams.use_bias:
            self.bias = tf.get_variable(
                'bias_weight', [1, self._hparams.output_dim],
                initializer=tf.constant_initializer(0.0),
                regularizer=self.regularizer,
                trainable=True)
Beispiel #4
0
    def __init__(self,
                 cell=None,
                 vocab_size=None,
                 output_layer=None,
                 cell_dropout_mode=None,
                 hparams=None):
        ModuleBase.__init__(self, hparams)

        self._helper = None
        self._initial_state = None

        # Make rnn cell
        with tf.variable_scope(self.variable_scope):
            if cell is not None:
                self._cell = cell
            else:
                self._cell = layers.get_rnn_cell(self._hparams.rnn_cell,
                                                 cell_dropout_mode)
        self._beam_search_cell = None

        # Make the output layer
        self._output_layer, self._vocab_size = _make_output_layer(
            output_layer, vocab_size, self._hparams.output_layer_bias,
            self.variable_scope)

        self.max_decoding_length = None
Beispiel #5
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):

        ModuleBase.__init__(self, hparams=hparams)

        self.pretrained_model_dir = None

        if self.model_name == "BERT":
            load_func = load_pretrained_bert
            transform_func = transform_bert_to_texar_config
        elif self.model_name == "XLNet":
            load_func = load_pretrained_xlnet
            transform_func = transform_xlnet_to_texar_config
        else:
            raise ValueError("Could not find this pre-trained model.")

        if pretrained_model_name:
            self.pretrained_model_dir = load_func(pretrained_model_name,
                                                  cache_dir)
        elif self._hparams.pretrained_model_name is not None:
            self.pretrained_model_dir = load_func(
                self._hparams.pretrained_model_name, cache_dir)

        if self.pretrained_model_dir:
            self.pretrained_model_hparams = transform_func(
                self.pretrained_model_dir)
Beispiel #6
0
    def __init__(self,
                 r_r_bias,
                 r_w_bias,
                 r_s_bias=None,
                 segment_embed=None,
                 hparams=None):
        ModuleBase.__init__(self, hparams=hparams)

        self.num_heads = self._hparams.num_heads
        self.head_dim = self._hparams.head_dim
        hidden_dim = self._hparams.hidden_dim

        with tf.variable_scope(self.variable_scope):
            if self._hparams.initializer:
                tf.get_variable_scope().set_initializer(
                    layers.get_initializer(self._hparams.initializer))

            # Official implementation creates these head variables.
            # If we create dense layers instead, there would be dimension
            # mismatch while loading the tensors
            # TODO(avinash) : Can we reshape tensors while loading the ckpt?
            self.q_head = tf.get_variable(
                'q/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.k_head = tf.get_variable(
                'k/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.v_head = tf.get_variable(
                'v/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.k_head_r = tf.get_variable(
                'r/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.dropout = layers.get_layer(hparams={
                "type": "Dropout",
                "kwargs": {
                    "rate": self._hparams.dropout
                }
            })

            self.dropout_attn = layers.get_layer(
                hparams={
                    "type": "Dropout",
                    "kwargs": {
                        "rate": self._hparams.attention_dropout
                    }
                })

            self.output_projection = tf.get_variable(
                'o/kernel', [hidden_dim, self.num_heads, self.head_dim])

            self.r_r_bias = r_r_bias
            self.r_w_bias = r_w_bias

            if self._hparams.use_segments:
                self.segment_embed = segment_embed
                self.r_s_bias = r_s_bias

            self.scale = 1 / (self.head_dim**0.5)
Beispiel #7
0
    def __init__(self, hparams=None):
        ModuleBase.__init__(self, hparams)

        self._layers = []
        self._layer_names = []
        self._layers_by_name = {}
        self._layer_outputs = []
        self._layer_outputs_by_name = {}
Beispiel #8
0
    def __init__(self,
                 network=None,
                 network_kwargs=None,
                 hparams=None):
        ModuleBase.__init__(self, hparams=hparams)

        with tf.variable_scope(self.variable_scope):
            self._build_network(network, network_kwargs)
Beispiel #9
0
    def __init__(self, hparams=None):
        ModuleBase.__init__(self, hparams)

        self.alphas = tf.get_variable('alpha',
                                      self._hparams.dim,
                                      initializer=tf.constant_initializer(0.0),
                                      trainable=True,
                                      dtype=tf.float32)
Beispiel #10
0
    def __init__(self, hparams=None):
        ModuleBase.__init__(self, hparams)

        with tf.variable_scope(self._hparams.name_scope):
            self.epsilon = self._hparams.epsilon
            self.decay = self._hparams.decay
            size = self._hparams.size

            self.scale = tf.get_variable(
                'scale', [size],
                initializer=tf.constant_initializer(0.1),
                trainable=True)
            self.offset = tf.get_variable('offset', [size], trainable=True)
            self.pop_mean = tf.get_variable('pop_mean', [size],
                                            initializer=tf.zeros_initializer(),
                                            trainable=False)
            self.pop_var = tf.get_variable('pop_var', [size],
                                           initializer=tf.ones_initializer(),
                                           trainable=False)
Beispiel #11
0
    def __init__(self,
                 raw_memory_dim,
                 input_embed_fn=None,
                 output_embed_fn=None,
                 query_embed_fn=None,
                 hparams=None):
        ModuleBase.__init__(self, hparams)

        self._raw_memory_dim = raw_memory_dim

        self._n_hops = self._hparams.n_hops
        self._relu_dim = self._hparams.relu_dim
        self._memory_size = self._hparams.memory_size

        with tf.variable_scope(self.variable_scope):
            self._A, self._C, self._B, self._memory_dim = self._build_embed_fn(
                input_embed_fn, output_embed_fn, query_embed_fn)

            self.H = None
            if self.hparams.use_H:
                self.H = tf.get_variable(
                    name="H", shape=[self._memory_dim, self._memory_dim])
Beispiel #12
0
 def __init__(self, hparams=None):
     ModuleBase.__init__(self, hparams)
Beispiel #13
0
    def __init__(self, num_embeds=None, hparams=None):
        ModuleBase.__init__(self, hparams)

        self._num_embeds = num_embeds
Beispiel #14
0
 def __init__(self, hparams=None):
     ModuleBase.__init__(self, hparams)
     self.sinusoid_embed = PositionalEmbedding(self._hparams.dim)
Beispiel #15
0
 def __init__(self, embed_dim):
     ModuleBase.__init__(self)
     freq_seq = tf.range(0.0, embed_dim, 2.0)
     self.inv_freq = 1 / (10000 ** (freq_seq / embed_dim))
Beispiel #16
0
    def __init__(self, H=None, hparams=None):
        ModuleBase.__init__(self, hparams)

        self._H = H
Beispiel #17
0
 def __init__(self, output_size, hparams=None):
     ModuleBase.__init__(self, hparams)
     self._output_size = output_size