def __init__(self, hparams=None): ModuleBase.__init__(self, hparams) hidden_dim = self._hparams.hidden_dim ffn_inner_dim = self._hparams.ffn_inner_dim dropout = self._hparams.dropout activation = self._hparams.activation if activation == 'gelu': activation = layers.gelu with tf.variable_scope(self.variable_scope): tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) l1_hparams = { "type": "Dense", "kwargs": { "units": ffn_inner_dim, "activation": activation } } self.linear1 = layers.get_layer(hparams=l1_hparams) dropout_hparams = { "type": "Dropout", "kwargs": { "rate": dropout } } self.dropout = layers.get_layer(hparams=dropout_hparams) l2_hparams = { "type": "Dense", "kwargs": { "units": hidden_dim } } self.linear2 = layers.get_layer(hparams=l2_hparams)
def test_get_layer(self): """Tests :func:`texar.tf.core.layers.get_layer`. """ hparams = {"type": "Conv1D"} layer = layers.get_layer(hparams) self.assertTrue(isinstance(layer, tf.layers.Conv1D)) hparams = { "type": "MergeLayer", "kwargs": { "layers": [{ "type": "Conv1D" }, { "type": "Conv1D" }] } } layer = layers.get_layer(hparams) self.assertTrue(isinstance(layer, tx.core.MergeLayer)) hparams = {"type": tf.layers.Conv1D} layer = layers.get_layer(hparams) self.assertTrue(isinstance(layer, tf.layers.Conv1D)) hparams = {"type": tf.layers.Conv1D(filters=10, kernel_size=2)} layer = layers.get_layer(hparams) self.assertTrue(isinstance(layer, tf.layers.Conv1D))
def __init__(self, r_r_bias, r_w_bias, r_s_bias=None, segment_embed=None, hparams=None): ModuleBase.__init__(self, hparams=hparams) self.num_heads = self._hparams.num_heads self.head_dim = self._hparams.head_dim hidden_dim = self._hparams.hidden_dim with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) # Official implementation creates these head variables. # If we create dense layers instead, there would be dimension # mismatch while loading the tensors # TODO(avinash) : Can we reshape tensors while loading the ckpt? self.q_head = tf.get_variable( 'q/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.k_head = tf.get_variable( 'k/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.v_head = tf.get_variable( 'v/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.k_head_r = tf.get_variable( 'r/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.dropout = layers.get_layer(hparams={ "type": "Dropout", "kwargs": { "rate": self._hparams.dropout } }) self.dropout_attn = layers.get_layer( hparams={ "type": "Dropout", "kwargs": { "rate": self._hparams.attention_dropout } }) self.output_projection = tf.get_variable( 'o/kernel', [hidden_dim, self.num_heads, self.head_dim]) self.r_r_bias = r_r_bias self.r_w_bias = r_w_bias if self._hparams.use_segments: self.segment_embed = segment_embed self.r_s_bias = r_s_bias self.scale = 1 / (self.head_dim**0.5)
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super(XLNetClassifier, self).__init__(hparams=hparams) with tf.variable_scope(self.variable_scope): tf.get_variable_scope().set_initializer( get_initializer(self._hparams.initializer)) # Creates the underlying encoder encoder_hparams = dict_fetch(hparams, XLNetEncoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = "encoder" self._encoder = XLNetEncoder( pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) if self._hparams.use_projection: self.projection = get_layer( hparams={ "type": "Dense", "kwargs": { "units": self._encoder.output_size } }) # Creates an dropout layer drop_kwargs = {"rate": self._hparams.dropout} layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs} self._dropout_layer = get_layer(hparams=layer_hparams) # Creates an additional classification layer if needed self._num_classes = self._hparams.num_classes if self._num_classes <= 0: self._logit_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" layer_hparams = {"type": "Dense", "kwargs": logit_kwargs} self._logit_layer = get_layer(hparams=layer_hparams)
def _build_layers(network, layers=None, layer_hparams=None): """Builds layers. Either :attr:`layer_hparams` or :attr:`layers` must be provided. If both are given, :attr:`layers` will be used. Args: network: An instance of a subclass of :class:`~texar.tf.modules.networks.network_base.FeedForwardNetworkBase` layers (optional): A list of layer instances. layer_hparams (optional): A list of layer hparams, each to which is fed to :func:`~texar.tf.core.layers.get_layer` to create the layer instance. """ with tf.variable_scope(network.variable_scope): if layers is not None: network._layers = layers else: if layer_hparams is None: raise ValueError( 'Either `layer` or `layer_hparams` is required.') network._layers = [] for _, hparams in enumerate(layer_hparams): network._layers.append(get_layer(hparams=hparams)) for layer in network._layers: layer_name = uniquify_str(layer.name, network._layer_names) network._layer_names.append(layer_name) network._layers_by_name[layer_name] = layer
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super(BERTEncoder, self).__init__(hparams=hparams) self.load_pretrained_config(pretrained_model_name, cache_dir) with tf.variable_scope(self.variable_scope): # Word embedding self.word_embedder = WordEmbedder( vocab_size=self._hparams.vocab_size, hparams=self._hparams.embed) # Segment embedding for each type of tokens self.segment_embedder = WordEmbedder( vocab_size=self._hparams.type_vocab_size, hparams=self._hparams.segment_embed) # Position embedding self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embed) # The BERT encoder (a TransformerEncoder) self.encoder = TransformerEncoder(hparams=self._hparams.encoder) with tf.variable_scope("pooler"): kwargs_i = { "units": self._hparams.hidden_size, "activation": tf.tanh } layer_hparams = {"type": "Dense", "kwargs": kwargs_i} self.pooler = get_layer(hparams=layer_hparams)
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super().__init__(hparams=hparams) with tf.variable_scope(self.variable_scope): encoder_hparams = dict_fetch(hparams, GPT2Encoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = None self._encoder = GPT2Encoder( pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) # Creates an dropout layer drop_kwargs = {"rate": self._hparams.dropout} layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs} self._dropout_layer = get_layer(hparams=layer_hparams) # Creates an additional classification layer if needed self._num_classes = self._hparams.num_classes if self._num_classes <= 0: self._logit_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" layer_hparams = {"type": "Dense", "kwargs": logit_kwargs} self._logit_layer = get_layer(hparams=layer_hparams)
def append_layer(self, layer): """Appends a layer to the end of the network. The method is only feasible before :attr:`_build` is called. Args: layer: A :tf_main:`tf.layers.Layer <layers/Layer>` instance, or a dict of layer hyperparameters. """ if self._built: raise TexarError("`FeedForwardNetwork.append_layer` can be " "called only before `_build` is called.") with tf.variable_scope(self.variable_scope): layer_ = layer if not isinstance(layer_, tf.layers.Layer): layer_ = get_layer(hparams=layer_) self._layers.append(layer_) layer_name = uniquify_str(layer_.name, self._layer_names) self._layer_names.append(layer_name) self._layers_by_name[layer_name] = layer_
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): PretrainedBase.__init__(self, pretrained_model_name, cache_dir, hparams) if self.pretrained_model_dir: self._hparams = HParams(self.pretrained_model_hparams, self._hparams.todict()) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) # Word embedding self.word_embedder = WordEmbedder( vocab_size=self._hparams.vocab_size, hparams=self._hparams.embed) # Segment embedding for each type of tokens self.segment_embedder = WordEmbedder( vocab_size=self._hparams.type_vocab_size, hparams=self._hparams.segment_embed) # Position embedding self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embed) # The BERT encoder (a TransformerEncoder) self.encoder = TransformerEncoder(hparams=self._hparams.encoder) with tf.variable_scope("pooler"): kwargs_i = { "units": self._hparams.hidden_size, "activation": tf.tanh } layer_hparams = {"type": "Dense", "kwargs": kwargs_i} self.pooler = layers.get_layer(hparams=layer_hparams)
def __init__(self, cell=None, cell_dropout_mode=None, output_layer=None, hparams=None): ClassifierBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): # Creates the underlying encoder encoder_hparams = utils.dict_fetch( hparams, UnidirectionalRNNEncoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = None self._encoder = UnidirectionalRNNEncoder( cell=cell, cell_dropout_mode=cell_dropout_mode, output_layer=output_layer, hparams=encoder_hparams) # Creates an additional classification layer if needed self._num_classes = self._hparams.num_classes if self._num_classes <= 0: self._logit_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" layer_hparams = {"type": "Dense", "kwargs": logit_kwargs} self._logit_layer = layers.get_layer(hparams=layer_hparams)
def _build_dense_output_layer(hparams): nlayers = hparams.num_layers if nlayers <= 0: return None layer_size = _to_list(hparams.layer_size, 'output_layer.layer_size', nlayers) other_kwargs = hparams.other_dense_kwargs or {} if isinstance(other_kwargs, HParams): other_kwargs = other_kwargs.todict() if not isinstance(other_kwargs, dict): raise ValueError( "hparams 'output_layer.other_dense_kwargs' must be a dict.") dense_layers = [] for i in range(nlayers): if i == nlayers - 1: activation = hparams.final_layer_activation else: activation = hparams.activation kwargs_i = { "units": layer_size[i], "activation": activation, "name": "dense_%d" % (i + 1) } kwargs_i.update(other_kwargs) layer_hparams = {"type": "Dense", "kwargs": kwargs_i} dense_layers.append(layers.get_layer(hparams=layer_hparams)) if len(dense_layers) == 1: dense_layers = dense_layers[0] return dense_layers
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super(XLNetEncoder, self).__init__(hparams=hparams) self.load_pretrained_config(pretrained_model_name, cache_dir) num_layers = self._hparams.num_layers use_segments = self._hparams.use_segments untie_r = self._hparams.untie_r with tf.variable_scope(self.variable_scope): if untie_r: self.r_w_bias = tf.get_variable('r_w_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) self.r_r_bias = tf.get_variable('r_r_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) else: self.r_w_bias = tf.get_variable( 'r_w_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32) self.r_r_bias = tf.get_variable( 'r_r_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32) if use_segments: self.segment_embed = tf.get_variable('seg_embed', [ num_layers, 2, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) self.r_s_bias = (tf.get_variable( 'r_s_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) if untie_r else tf.get_variable( 'r_s_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32)) else: self.segment_embed = None self.r_s_bias = None # Word embedding self.word_embedder = WordEmbedder( vocab_size=self._hparams.vocab_size, hparams={"dim": self._hparams.hidden_dim}) # Position embedding self.pos_embed = RelativePositionalEncoding( hparams={ "dim": self._hparams.hidden_dim, "max_seq_len": self._hparams.max_seq_len }) self.attn_layers = [] self.ff_layers = [] rel_attn_hparams = dict_fetch( self._hparams, RelativeMutiheadAttention.default_hparams()) rel_attn_hparams["name"] = "rel_attn" ff_hparams = dict_fetch(self._hparams, PositionWiseFF.default_hparams()) ff_hparams["name"] = "ff" for i in range(num_layers): with tf.variable_scope("layer_{}".format(i)): if self._hparams.untie_r: if use_segments: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias[i], self.r_w_bias[i], self.r_s_bias[i], self.segment_embed[i], hparams=rel_attn_hparams)) else: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias[i], self.r_w_bias[i], hparams=rel_attn_hparams)) else: if use_segments: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias, self.r_w_bias, self.r_s_bias, self.segment_embed[i], hparams=rel_attn_hparams)) else: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias, self.r_w_bias, hparams=rel_attn_hparams)) self.ff_layers.append(PositionWiseFF(hparams=ff_hparams)) dropout_hparams = { "type": "Dropout", "kwargs": { "rate": self._hparams.dropout } } self.dropout = get_layer(hparams=dropout_hparams) self.mask_embed = tf.get_variable('mask_emb', [1, 1, self.hparams.hidden_dim], dtype=tf.float32)