def __init__(self, config, batch_size, dropout_source, dropout_embedding, dropout_hidden): self.dropout_source = dropout_source with tf.variable_scope("embedding"): self.emb_layer = layers.EmbeddingLayer(config.source_vocab_sizes, config.dim_per_factor) if config.theano_compat: bias_type = layers.LegacyBiasType.THEANO_A else: bias_type = layers.LegacyBiasType.NEMATUS_COMPAT_FALSE with tf.variable_scope("forward-stack"): self.forward_encoder = layers.GRUStack( input_size=config.embedding_size, state_size=config.state_size, batch_size=batch_size, use_layer_norm=config.rnn_layer_normalization, legacy_bias_type=bias_type, dropout_input=dropout_embedding, dropout_state=dropout_hidden, stack_depth=config.rnn_enc_depth, transition_depth=config.rnn_enc_transition_depth, alternating=True, residual_connections=True, first_residual_output=1) with tf.variable_scope("backward-stack"): self.backward_encoder = layers.GRUStack( input_size=config.embedding_size, state_size=config.state_size, batch_size=batch_size, use_layer_norm=config.rnn_layer_normalization, legacy_bias_type=bias_type, dropout_input=dropout_embedding, dropout_state=dropout_hidden, stack_depth=config.rnn_enc_depth, transition_depth=config.rnn_enc_transition_depth, alternating=True, reverse_alternation=True, residual_connections=True, first_residual_output=1)
def __init__(self, config, batch_size, dropout_source, dropout_embedding, dropout_hidden): self.dropout_source = dropout_source with tf.variable_scope("embedding"): self.emb_layer = layers.EmbeddingLayer(config.source_vocab_sizes, config.dim_per_factor) with tf.variable_scope("forward-stack"): self.forward_encoder = layers.GRUStack( input_size=config.embedding_size, state_size=config.state_size, batch_size=batch_size, use_layer_norm=config.use_layer_norm, nematus_compat=False, dropout_input=dropout_embedding, dropout_state=dropout_hidden, stack_depth=config.enc_depth, transition_depth=config.enc_recurrence_transition_depth, alternating=True, residual_connections=True, first_residual_output=1) with tf.variable_scope("backward-stack"): self.backward_encoder = layers.GRUStack( input_size=config.embedding_size, state_size=config.state_size, batch_size=batch_size, use_layer_norm=config.use_layer_norm, nematus_compat=False, dropout_input=dropout_embedding, dropout_state=dropout_hidden, stack_depth=config.enc_depth, transition_depth=config.enc_recurrence_transition_depth, alternating=True, reverse_alternation=True, residual_connections=True, first_residual_output=1)
def __init__(self, config, context, x_embs, x_mask, dropout_target, dropout_embedding, dropout_hidden, encoder_embedding_layer=None): self.dropout_target = dropout_target batch_size = tf.shape(x_mask)[1] with tf.variable_scope("initial_state_constructor"): context_sum = tf.reduce_sum(context * tf.expand_dims(x_mask, axis=2), axis=0) context_mean = context_sum / tf.expand_dims( tf.reduce_sum(x_mask, axis=0), axis=1) self.init_state_layer = layers.FeedForwardLayer( in_size=config.state_size * 2, out_size=config.state_size, batch_size=batch_size, use_layer_norm=config.rnn_layer_normalization, dropout_input=dropout_hidden) self.init_state = self.init_state_layer.forward(context_mean) self.x_embs = x_embs self.translation_maxlen = config.translation_maxlen self.embedding_size = config.target_embedding_size self.state_size = config.state_size self.target_vocab_size = config.target_vocab_size with tf.variable_scope("embedding"): if encoder_embedding_layer == None: self.y_emb_layer = layers.EmbeddingLayer( vocabulary_sizes=[config.target_vocab_size], dim_per_factor=[config.target_embedding_size]) else: self.y_emb_layer = encoder_embedding_layer with tf.variable_scope("base"): with tf.variable_scope("gru0"): if config.theano_compat: bias_type = layers.LegacyBiasType.THEANO_A else: bias_type = layers.LegacyBiasType.NEMATUS_COMPAT_FALSE self.grustep1 = layers.GRUStep( input_size=config.target_embedding_size, state_size=config.state_size, batch_size=batch_size, use_layer_norm=config.rnn_layer_normalization, legacy_bias_type=bias_type, dropout_input=dropout_embedding, dropout_state=dropout_hidden) with tf.variable_scope("attention"): self.attstep = layers.AttentionStep( context=context, context_state_size=2 * config.state_size, context_mask=x_mask, state_size=config.state_size, hidden_size=2 * config.state_size, use_layer_norm=config.rnn_layer_normalization, dropout_context=dropout_hidden, dropout_state=dropout_hidden) if config.theano_compat: bias_type = layers.LegacyBiasType.THEANO_B else: bias_type = layers.LegacyBiasType.NEMATUS_COMPAT_TRUE self.grustep2 = layers.DeepTransitionGRUStep( input_size=2 * config.state_size, state_size=config.state_size, batch_size=batch_size, use_layer_norm=config.rnn_layer_normalization, legacy_bias_type=bias_type, dropout_input=dropout_hidden, dropout_state=dropout_hidden, transition_depth=config.rnn_dec_base_transition_depth - 1, var_scope_fn=lambda i: "gru{0}".format(i + 1)) with tf.variable_scope("high"): if config.rnn_dec_depth == 1: self.high_gru_stack = None else: if config.theano_compat: bias_type = layers.LegacyBiasType.THEANO_A else: bias_type = layers.LegacyBiasType.NEMATUS_COMPAT_TRUE self.high_gru_stack = layers.GRUStack( input_size=config.state_size, state_size=config.state_size, batch_size=batch_size, use_layer_norm=config.rnn_layer_normalization, legacy_bias_type=bias_type, dropout_input=dropout_hidden, dropout_state=dropout_hidden, stack_depth=config.rnn_dec_depth - 1, transition_depth=config.rnn_dec_high_transition_depth, context_state_size=(2 * config.state_size if config.rnn_dec_deep_context else 0), residual_connections=True, first_residual_output=0) if config.rnn_lexical_model: with tf.variable_scope("lexical"): self.lexical_layer = layers.LexicalModel( in_size=config.embedding_size, out_size=config.embedding_size, batch_size=batch_size, use_layer_norm=config.rnn_layer_normalization, dropout_embedding=dropout_embedding, dropout_hidden=dropout_hidden) else: self.lexical_layer = None with tf.variable_scope("next_word_predictor"): W = None if config.tie_decoder_embeddings: W = self.y_emb_layer.get_embeddings(factor=0) W = tf.transpose(W) self.predictor = Predictor(config, batch_size, dropout_embedding, dropout_hidden, hidden_to_logits_W=W)