def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) self.multihead_attention_list = [] self.poswise_networks = [] for i in range(self._hparams.num_blocks): with tf.variable_scope("layer_{}".format(i)): with tf.variable_scope('attention'): mh_attn = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attention_list.append(mh_attn) if self._hparams.dim != mh_attn.hparams.output_dim: raise ValueError( 'The "dim" in the hparams of ' '"multihead_attention" should be equal to the ' '"dim" of TransformerEncoder') pw_net = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) final_dim = pw_net.hparams.layers[-1]['kwargs']['units'] if self._hparams.dim != final_dim: raise ValueError( 'The output dimenstion of ' '"poswise_feedforward" should be equal ' 'to the "dim" of TransformerEncoder.') self.poswise_networks.append(pw_net)
def __init__(self, encoder_major=None, encoder_minor=None, hparams=None): EncoderBase.__init__(self, hparams) encoder_major_hparams = utils.get_instance_kwargs( None, self._hparams.encoder_major_hparams) encoder_minor_hparams = utils.get_instance_kwargs( None, self._hparams.encoder_minor_hparams) if encoder_major is not None: self._encoder_major = encoder_major else: with tf.variable_scope(self.variable_scope.name): with tf.variable_scope('encoder_major'): self._encoder_major = utils.check_or_get_instance( self._hparams.encoder_major_type, encoder_major_hparams, ['texar.modules.encoders', 'texar.custom']) if encoder_minor is not None: self._encoder_minor = encoder_minor elif self._hparams.config_share: with tf.variable_scope(self.variable_scope.name): with tf.variable_scope('encoder_minor'): self._encoder_minor = utils.check_or_get_instance( self._hparams.encoder_major_type, encoder_major_hparams, ['texar.modules.encoders', 'texar.custom']) else: with tf.variable_scope(self.variable_scope.name): with tf.variable_scope('encoder_minor'): self._encoder_minor = utils.check_or_get_instance( self._hparams.encoder_minor_type, encoder_minor_hparams, ['texar.modules.encoders', 'texar.custom'])
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) self.position_embedder = \ SinusoidsPositionEmbedder( self._hparams.position_embedder_hparams)
def __init__(self, vocab_size=None, output_layer=None, tau=None, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) # Make the output layer self._output_layer, self._vocab_size = _make_output_layer( output_layer, vocab_size, self._hparams.output_layer_bias, self.variable_scope) # Make attention and poswise networks self.graph_multihead_attention_list = [] self.poswise_networks = [] for i in range(self._hparams.num_blocks): with tf.variable_scope("layer_{}".format(i)): with tf.variable_scope('attention'): mh_attn = GraphMultiheadAttentionEncoder( self._hparams.graph_multihead_attention) self.graph_multihead_attention_list.append(mh_attn) if self._hparams.dim != mh_attn.hparams.output_dim: raise ValueError( 'The "dim" in the hparams of ' '"multihead_attention" should be equal to the ' '"dim" of CrossGraphTransformerFixedLengthDecoder' ) pw_net = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) final_dim = pw_net.hparams.layers[-1]['kwargs']['units'] if self._hparams.dim != final_dim: raise ValueError( 'The output dimenstion of ' '"poswise_feedforward" should be equal ' 'to the "dim" of CrossGraphTransformerFixedLengthDecoder.' ) self.poswise_networks.append(pw_net) self._helper = None self._tau = tau
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) if self._hparams.position_embedder_type == 'sinusoids': self.position_embedder = SinusoidsPositionEmbedder( self._hparams.position_embedder_hparams) else: self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embedder_hparams) # pylint: disable=protected-access if self._hparams.dim != \ self.position_embedder._hparams.dim: raise ValueError('"dim" in ' 'TransformerEncoder hparams must be equal ' 'to "dim" in its ' 'position_embedder_hparams.') self.multihead_attention_list = [] self.poswise_networks = [] for i in range(self._hparams.num_blocks): with tf.variable_scope("layer_{}".format(i)): with tf.variable_scope('attention'): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attention_list.append( multihead_attention) # pylint: disable=protected-access if self._hparams.dim != \ multihead_attention._hparams.output_dim: raise ValueError('The "dim" in the hparams of ' 'multihead_attention should be equal ' 'to the "dim" of TransformerEncoder') poswise_network = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) # pylint: disable=protected-access if self._hparams.dim != \ poswise_network._hparams.layers[-1]['kwargs']['units']: # poswise_network._hparams.layers[-1]['units']: raise ValueError('The "units" in the "kwargs" of ' 'FeedForwardNetwork should be equal ' 'to the "dim" of TransformerEncoder') self.poswise_networks.append(poswise_network)
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) self.Q_dense = tf.layers.Dense(self._hparams.num_units, use_bias=False, name='q') self.K_dense = tf.layers.Dense(self._hparams.num_units, use_bias=False, name='k') self.V_dense = tf.layers.Dense(self._hparams.num_units, use_bias=False, name='v') self.O_dense = tf.layers.Dense(self._hparams.output_dim, use_bias=False, name='o')
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) use_bias = self._hparams.use_bias with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) self.Q_dense = tf.layers.Dense(self._hparams.num_units, use_bias=use_bias, name='query') self.K_dense = tf.layers.Dense(self._hparams.num_units, use_bias=use_bias, name='key') self.V_dense = tf.layers.Dense(self._hparams.num_units, use_bias=use_bias, name='value') self.O_dense = tf.layers.Dense(self._hparams.output_dim, use_bias=use_bias, name='output')
def __init__(self, embedding, vocab_size=None, hparams=None): EncoderBase.__init__(self, hparams) self._vocab_size = vocab_size self._embedding = None self.enc = None with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) if self._hparams.position_embedder.name == 'sinusoids': self.position_embedder = \ position_embedders.SinusoidsPositionEmbedder(\ self._hparams.position_embedder.hparams) if self._hparams.use_embedding: if isinstance(embedding, tf.Variable): self._embedding = embedding embed_dim = self._embedding.get_shape().as_list()[-1] if self._hparams.zero_pad: # TODO(zhiting): vocab has zero pad if not self._hparams.bos_pad: self._embedding = tf.concat(\ (tf.zeros(shape=[1, embed_dim]), self._embedding[1:, :]), 0) else: self._embedding = tf.concat(\ (tf.zeros(shape=[2, embed_dim]), self._embedding[2:, :]), 0) if self._vocab_size is None: self._vocab_size = self._embedding.get_shape().as_list()[0] with tf.variable_scope(self.variable_scope): if self._hparams.target_space_id is not None: space_embedding = tf.get_variable('target_space_embedding', \ [32, embed_dim]) self.target_symbol_embedding = tf.gather(space_embedding, \ self._hparams.target_space_id) else: self.target_symbol_embedding = None self.stack_output = None
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) self.position_embedder = \ SinusoidsPositionEmbedder( self._hparams.position_embedder_hparams) self.multihead_attention_list = [] self.poswise_networks = [] for i in range(self._hparams.num_blocks): with tf.variable_scope("layer_{}".format(i)): with tf.variable_scope('self_attention'): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attention_list.append( multihead_attention) # pylint: disable=protected-access if self._hparams.dim != \ multihead_attention._hparams.output_dim: raise ValueError('The output dimenstion of' 'MultiheadEncoder should be equal' 'to the dim of TransformerEncoder') poswise_network = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) # pylint: disable=protected-access if self._hparams.dim != \ poswise_network._hparams.layers[-1]['kwargs']['units']: # poswise_network._hparams.layers[-1]['units']: raise ValueError('The output dimenstion of' 'FeedForwardNetwork should be equal' 'to the dim of TransformerEncoder') self.poswise_networks.append(poswise_network)
def default_hparams(): """Returns a dictionary of hyperparameters with default values. The dictionary has the following structure and default values. Returns: dict: Adictionary with following structure and values: .. code-block:: python { "encoder_major_type": "UnidirectionalRNNEncoder", "encoder_major_hparams": {}, "encoder_minor_type": "UnidirectionalRNNEncoder", "encoder_minor_hparams": {}, "config_share": False, "name": "hierarchical_encoder_wrapper" } Here: "encoder_major_type": The class name of major encoder which can be found in ~texar.modules.encoders or ~texar.custom. "encoder_major_hparams": The hparams for major encoder's construction. "config_share": :attr:`encoder_minor_type` and :attr:`encoder_minor_hparams` will be replaced by major's corresponding hparams if set to true. "name": Name of the encoder. """ hparams = { "name": "hierarchical_encoder", "encoder_major_type": "UnidirectionalRNNEncoder", "encoder_major_hparams": {}, "encoder_minor_type": "UnidirectionalRNNEncoder", "encoder_minor_hparams": {}, "config_share": False, "@no_typecheck": ['encoder_major_hparams', 'encoder_minor_hparams'] } hparams.update(EncoderBase.default_hparams()) return hparams
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams)
def default_hparams(): """Returns a dictionary of hyperparameters with default values. .. role:: python(code) :language: python .. code-block:: python { "encoder_major_type": "UnidirectionalRNNEncoder", "encoder_major_hparams": {}, "encoder_minor_type": "UnidirectionalRNNEncoder", "encoder_minor_hparams": {}, "config_share": False, "name": "hierarchical_encoder_wrapper" } Here: "encoder_major_type" : str or class or instance The high-level encoder. Can be a RNN encoder class, its name or module path, or a class instance. Ignored if `encoder_major` is given to the encoder constructor. "encoder_major_hparams" : dict The hyperparameters for the high-level encoder. The high-level encoder is created with :python:`encoder_class(hparams=encoder_major_hparams)`. Ignored if `encoder_major` is given to the encoder constructor, or if "encoder_major_type" is an encoder instance. "encoder_minor_type" : str or class or instance The low-level encoder. Can be a RNN encoder class, its name or module path, or a class instance. Ignored if `encoder_minor` is given to the encoder constructor, or if "config_share" is True. "encoder_minor_hparams" : dict The hyperparameters for the low-level encoder. The high-level encoder is created with :python:`encoder_class(hparams=encoder_minor_hparams)`. Ignored if `encoder_minor` is given to the encoder constructor, or if "config_share" is True, or if "encoder_minor_type" is an encoder instance. "config_share": Whether to use encoder_major's hyperparameters to construct encoder_minor. "name": Name of the encoder. """ hparams = { "name": "hierarchical_encoder", "encoder_major_type": "UnidirectionalRNNEncoder", "encoder_major_hparams": {}, "encoder_minor_type": "UnidirectionalRNNEncoder", "encoder_minor_hparams": {}, "config_share": False, "@no_typecheck": ['encoder_major_hparams', 'encoder_minor_hparams'] } hparams.update(EncoderBase.default_hparams()) return hparams
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) self._input_size = self._hparams.dim self.self_attns = nn.ModuleList() if not self._hparams.use_bert_config: self.self_attn_layer_norm = nn.ModuleList() self.poswise_networks = nn.ModuleList() self.poswise_layer_norm = nn.ModuleList() self.output_layer_norm = nn.ModuleList() if self._hparams.use_bert_config: # In TensorFlow, eps for LayerNorm is 1e-12 by default. eps = 1e-12 else: # In PyTorch, eps for LayerNorm is 1e-6 by default. eps = 1e-6 for _ in range(self._hparams.num_blocks): mh_attn = MultiheadAttentionEncoder( self._input_size, self._hparams.multihead_attention) self.self_attns.append(mh_attn) if not self._hparams.use_bert_config: self.self_attn_layer_norm.append( nn.LayerNorm(self._input_size, eps=eps)) if self._hparams.dim != mh_attn.hparams.output_dim: raise ValueError( 'The "dim" in the hparams of ' '"multihead_attention" should be equal to the ' '"dim" of TransformerEncoder') pw_net = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) final_dim = pw_net.hparams.layers[-1]['kwargs']['out_features'] if self._hparams.dim != final_dim: raise ValueError('The output dimenstion of ' '"poswise_feedforward" should be equal ' 'to the "dim" of TransformerEncoder.') self.poswise_networks.append(pw_net) self.poswise_layer_norm.append( nn.LayerNorm(self._input_size, eps=eps)) if self._hparams.use_bert_config: self.output_layer_norm.append( nn.LayerNorm(self._input_size, eps=eps)) self.embed_dropout = nn.Dropout(p=self._hparams.embedding_dropout) self.residual_dropout = nn.Dropout(p=self._hparams.residual_dropout) if self._hparams.use_bert_config: self.input_normalizer = nn.LayerNorm(self._input_size, eps=eps) else: self.final_layer_normalizer = nn.LayerNorm(self._input_size, eps=eps) if self._hparams.initializer: initialize = layers.get_initializer(self._hparams.initializer) assert initialize is not None # Do not re-initialize LayerNorm modules. for name, param in self.named_parameters(): if name.split( '.')[-1] == 'weight' and 'layer_norm' not in name: initialize(param)