def __init__(self, embedding, hparams=None): ModuleBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) if self._hparams.position_embedder_type == 'sinusoids': self.position_embedder = SinusoidsPositionEmbedder( self._hparams.position_embedder_hparams) else: self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embedder_hparams) self._embedding = embedding self._vocab_size = self._embedding.get_shape().as_list()[0] self.output_layer = \ self._build_output_layer(shape_list(self._embedding)[-1]) self.multihead_attentions = {'self_att': [], 'encdec_att': []} self.poswise_networks = [] for i in range(self._hparams.num_blocks): layer_name = 'layer_{}'.format(i) with tf.variable_scope(layer_name): with tf.variable_scope("self_attention"): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attentions['self_att'].append( multihead_attention) # pylint: disable=protected-access if self._hparams.dim != \ multihead_attention._hparams.output_dim: raise ValueError('The output dimenstion of ' 'MultiheadEncoder should be equal ' 'to the dim of TransformerDecoder') with tf.variable_scope('encdec_attention'): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attentions['encdec_att'].append( multihead_attention) if self._hparams.dim != \ multihead_attention._hparams.output_dim: raise ValueError('The output dimenstion of ' 'MultiheadEncoder should be equal ' 'to the dim of TransformerDecoder') poswise_network = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) if self._hparams.dim != \ poswise_network._hparams.layers[-1]['kwargs']['units']: raise ValueError('The output dimenstion of ' 'FeedForwardNetwork should be equal ' 'to the dim of TransformerDecoder') self.poswise_networks.append(poswise_network)
def test_sinusoids_position_embedder(self): """Tests :class:`texar.modules.SinusoidsPositionEmbedder`. """ position_size = 64 input_size = [100] hparams = {'dim': 513} # use odd dimension to ensure padding correct embedder = SinusoidsPositionEmbedder(position_size, hparams=hparams) inputs = torch.randint(position_size - 1, input_size) outputs = embedder(inputs) self.assertEqual(list(outputs.shape), input_size + [hparams['dim']])
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) self.position_embedder = \ SinusoidsPositionEmbedder( self._hparams.position_embedder_hparams)
def test_sinusoids_position_embedder(self): """Tests :class:`texar.modules.SinusoidsPositionEmbedder`. """ position_size = 64 input_size = (23, 18) hparams = {'dim': 513} # use odd dimension to ensure padding correct embedder = SinusoidsPositionEmbedder(position_size, hparams=hparams) inputs = torch.randint(position_size - 1, input_size) outputs = embedder(inputs) self.assertEqual(outputs.size(), input_size + (hparams['dim'], )) embedder_no_cache = SinusoidsPositionEmbedder( None, hparams={ **hparams, 'cache_embeddings': False }) wide_inputs = torch.randint(-position_size, position_size * 2, input_size) wide_outputs = embedder_no_cache(wide_inputs) self.assertEqual(wide_outputs.size(), input_size + (hparams['dim'], )) no_cache_outputs = embedder_no_cache(inputs) np.testing.assert_array_equal(outputs, no_cache_outputs)
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) if self._hparams.position_embedder_type == 'sinusoids': self.position_embedder = SinusoidsPositionEmbedder( self._hparams.position_embedder_hparams) else: self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embedder_hparams) # pylint: disable=protected-access if self._hparams.dim != \ self.position_embedder._hparams.dim: raise ValueError('"dim" in ' 'TransformerEncoder hparams must be equal ' 'to "dim" in its ' 'position_embedder_hparams.') self.multihead_attention_list = [] self.poswise_networks = [] for i in range(self._hparams.num_blocks): with tf.variable_scope("layer_{}".format(i)): with tf.variable_scope('attention'): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attention_list.append( multihead_attention) # pylint: disable=protected-access if self._hparams.dim != \ multihead_attention._hparams.output_dim: raise ValueError('The "dim" in the hparams of ' 'multihead_attention should be equal ' 'to the "dim" of TransformerEncoder') poswise_network = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) # pylint: disable=protected-access if self._hparams.dim != \ poswise_network._hparams.layers[-1]['kwargs']['units']: # poswise_network._hparams.layers[-1]['units']: raise ValueError('The "units" in the "kwargs" of ' 'FeedForwardNetwork should be equal ' 'to the "dim" of TransformerEncoder') self.poswise_networks.append(poswise_network)
def __init__(self, embedding, hparams=None): ModuleBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( \ layers.get_initializer(self._hparams.initializer)) self.position_embedder = \ SinusoidsPositionEmbedder( self._hparams.position_embedder_hparams) self._embedding = embedding self._vocab_size = self._embedding.get_shape().as_list()[0] self.output_layer = \ self._build_output_layer(shape_list(self._embedding)[-1])
def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) self.position_embedder = \ SinusoidsPositionEmbedder( self._hparams.position_embedder_hparams) self.multihead_attention_list = [] self.poswise_networks = [] for i in range(self._hparams.num_blocks): with tf.variable_scope("layer_{}".format(i)): with tf.variable_scope('self_attention'): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attention_list.append( multihead_attention) # pylint: disable=protected-access if self._hparams.dim != \ multihead_attention._hparams.output_dim: raise ValueError('The output dimenstion of' 'MultiheadEncoder should be equal' 'to the dim of TransformerEncoder') poswise_network = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) # pylint: disable=protected-access if self._hparams.dim != \ poswise_network._hparams.layers[-1]['kwargs']['units']: # poswise_network._hparams.layers[-1]['units']: raise ValueError('The output dimenstion of' 'FeedForwardNetwork should be equal' 'to the dim of TransformerEncoder') self.poswise_networks.append(poswise_network)