def __init__(self, vocab_size=None, output_layer=None, hparams=None): ModuleBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) # Make the output layer self._output_layer, self._vocab_size = _make_output_layer( output_layer, vocab_size, self._hparams.output_layer_bias, self.variable_scope) # Make attention and poswise networks self.multihead_attentions = {'self_att': [], 'encdec_att': []} self.poswise_networks = [] for i in range(self._hparams.num_blocks): layer_name = 'layer_{}'.format(i) with tf.variable_scope(layer_name): with tf.variable_scope("self_attention"): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attentions['self_att'].append( multihead_attention) if self._hparams.dim != \ multihead_attention.hparams.output_dim: raise ValueError('The output dimenstion of ' 'MultiheadEncoder should be equal ' 'to the dim of TransformerDecoder') with tf.variable_scope('encdec_attention'): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attentions['encdec_att'].append( multihead_attention) if self._hparams.dim != \ multihead_attention.hparams.output_dim: raise ValueError('The output dimenstion of ' 'MultiheadEncoder should be equal ' 'to the dim of TransformerDecoder') pw_net = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) final_dim = pw_net.hparams.layers[-1]['kwargs']['units'] if self._hparams.dim != final_dim: raise ValueError( 'The output dimenstion of ' '"poswise_feedforward" should be equal ' 'to the "dim" of TransformerDecoder.') self.poswise_networks.append(pw_net) # Built in _build() self.context = None self.context_sequence_length = None self.embedding = None self._helper = None self._cache = None self.max_decoding_length = None
def __init__(self, hparams=None): ModuleBase.__init__(self, hparams) hidden_dim = self._hparams.hidden_dim ffn_inner_dim = self._hparams.ffn_inner_dim dropout = self._hparams.dropout activation = self._hparams.activation if activation == 'gelu': activation = layers.gelu with tf.variable_scope(self.variable_scope): tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) l1_hparams = { "type": "Dense", "kwargs": { "units": ffn_inner_dim, "activation": activation } } self.linear1 = layers.get_layer(hparams=l1_hparams) dropout_hparams = { "type": "Dropout", "kwargs": { "rate": dropout } } self.dropout = layers.get_layer(hparams=dropout_hparams) l2_hparams = { "type": "Dense", "kwargs": { "units": hidden_dim } } self.linear2 = layers.get_layer(hparams=l2_hparams)
def __init__(self, hparams=None): ModuleBase.__init__(self, hparams) self.dense_layers = [ Dense(self._hparams.input_dim, activation='relu', use_bias=True, kernel_regularizer=l2(self._hparams.l2_reg)) ] if self._hparams.l2_reg == 0.0: self.regularizer = None else: self.regularizer = tf.contrib.layers.l2_regularizer( scale=self._hparams.l2_reg) self.neigh_weights = tf.get_variable( 'neigh_weights', [self._hparams.input_dim * 2, self._hparams.output_dim], initializer=tf.contrib.layers.xavier_initializer(), regularizer=self.regularizer, trainable=True) ###check? if self._hparams.use_bias: self.bias = tf.get_variable( 'bias_weight', [1, self._hparams.output_dim], initializer=tf.constant_initializer(0.0), regularizer=self.regularizer, trainable=True)
def __init__(self, cell=None, vocab_size=None, output_layer=None, cell_dropout_mode=None, hparams=None): ModuleBase.__init__(self, hparams) self._helper = None self._initial_state = None # Make rnn cell with tf.variable_scope(self.variable_scope): if cell is not None: self._cell = cell else: self._cell = layers.get_rnn_cell(self._hparams.rnn_cell, cell_dropout_mode) self._beam_search_cell = None # Make the output layer self._output_layer, self._vocab_size = _make_output_layer( output_layer, vocab_size, self._hparams.output_layer_bias, self.variable_scope) self.max_decoding_length = None
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): ModuleBase.__init__(self, hparams=hparams) self.pretrained_model_dir = None if self.model_name == "BERT": load_func = load_pretrained_bert transform_func = transform_bert_to_texar_config elif self.model_name == "XLNet": load_func = load_pretrained_xlnet transform_func = transform_xlnet_to_texar_config else: raise ValueError("Could not find this pre-trained model.") if pretrained_model_name: self.pretrained_model_dir = load_func(pretrained_model_name, cache_dir) elif self._hparams.pretrained_model_name is not None: self.pretrained_model_dir = load_func( self._hparams.pretrained_model_name, cache_dir) if self.pretrained_model_dir: self.pretrained_model_hparams = transform_func( self.pretrained_model_dir)
def __init__(self, r_r_bias, r_w_bias, r_s_bias=None, segment_embed=None, hparams=None): ModuleBase.__init__(self, hparams=hparams) self.num_heads = self._hparams.num_heads self.head_dim = self._hparams.head_dim hidden_dim = self._hparams.hidden_dim with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) # Official implementation creates these head variables. # If we create dense layers instead, there would be dimension # mismatch while loading the tensors # TODO(avinash) : Can we reshape tensors while loading the ckpt? self.q_head = tf.get_variable( 'q/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.k_head = tf.get_variable( 'k/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.v_head = tf.get_variable( 'v/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.k_head_r = tf.get_variable( 'r/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.dropout = layers.get_layer(hparams={ "type": "Dropout", "kwargs": { "rate": self._hparams.dropout } }) self.dropout_attn = layers.get_layer( hparams={ "type": "Dropout", "kwargs": { "rate": self._hparams.attention_dropout } }) self.output_projection = tf.get_variable( 'o/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.r_r_bias = r_r_bias self.r_w_bias = r_w_bias if self._hparams.use_segments: self.segment_embed = segment_embed self.r_s_bias = r_s_bias self.scale = 1 / (self.head_dim**0.5)
def __init__(self, hparams=None): ModuleBase.__init__(self, hparams) self._layers = [] self._layer_names = [] self._layers_by_name = {} self._layer_outputs = [] self._layer_outputs_by_name = {}
def __init__(self, network=None, network_kwargs=None, hparams=None): ModuleBase.__init__(self, hparams=hparams) with tf.variable_scope(self.variable_scope): self._build_network(network, network_kwargs)
def __init__(self, hparams=None): ModuleBase.__init__(self, hparams) self.alphas = tf.get_variable('alpha', self._hparams.dim, initializer=tf.constant_initializer(0.0), trainable=True, dtype=tf.float32)
def __init__(self, hparams=None): ModuleBase.__init__(self, hparams) with tf.variable_scope(self._hparams.name_scope): self.epsilon = self._hparams.epsilon self.decay = self._hparams.decay size = self._hparams.size self.scale = tf.get_variable( 'scale', [size], initializer=tf.constant_initializer(0.1), trainable=True) self.offset = tf.get_variable('offset', [size], trainable=True) self.pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer(), trainable=False) self.pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer(), trainable=False)
def __init__(self, raw_memory_dim, input_embed_fn=None, output_embed_fn=None, query_embed_fn=None, hparams=None): ModuleBase.__init__(self, hparams) self._raw_memory_dim = raw_memory_dim self._n_hops = self._hparams.n_hops self._relu_dim = self._hparams.relu_dim self._memory_size = self._hparams.memory_size with tf.variable_scope(self.variable_scope): self._A, self._C, self._B, self._memory_dim = self._build_embed_fn( input_embed_fn, output_embed_fn, query_embed_fn) self.H = None if self.hparams.use_H: self.H = tf.get_variable( name="H", shape=[self._memory_dim, self._memory_dim])
def __init__(self, hparams=None): ModuleBase.__init__(self, hparams)
def __init__(self, num_embeds=None, hparams=None): ModuleBase.__init__(self, hparams) self._num_embeds = num_embeds
def __init__(self, hparams=None): ModuleBase.__init__(self, hparams) self.sinusoid_embed = PositionalEmbedding(self._hparams.dim)
def __init__(self, embed_dim): ModuleBase.__init__(self) freq_seq = tf.range(0.0, embed_dim, 2.0) self.inv_freq = 1 / (10000 ** (freq_seq / embed_dim))
def __init__(self, H=None, hparams=None): ModuleBase.__init__(self, hparams) self._H = H
def __init__(self, output_size, hparams=None): ModuleBase.__init__(self, hparams) self._output_size = output_size