Exemple #1
0
    def _preprocess(self, features):
        """Preprocesses features for multilingual translation."""
        inputs = features["inputs"]
        targets = features["targets"]
        target_tags = features["target_tags"]

        # Expand target tags to beam width, if necessary.
        if self._hparams.mode == tf_estimator.ModeKeys.PREDICT:
            # <float32> [batch_size * beam_width, 1, 1, emb_size].
            beam_width = self._hparams.beam_width
            target_tags = tf.tile(target_tags, [beam_width, 1, 1, 1])

        # Add target tags to the input sequences.
        # <float32> [batch_size, seq_len + 1, 1, emb_size].
        inputs = tf.concat([target_tags, inputs], axis=1)

        # Compute length of the input sequences.
        inputs_length = common_layers.length_from_embedding(inputs)
        inputs = common_layers.flatten4d3d(inputs)

        # Preprocess targets.
        targets = common_layers.shift_right(targets)
        # Add 1 to account for the padding added to the left from shift_right.
        targets_length = common_layers.length_from_embedding(targets) + 1
        targets = common_layers.flatten4d3d(targets)

        return inputs, inputs_length, targets, targets_length
def lstm_seq2seq_internal(inputs, targets, hparams, train):
    """The basic LSTM seq2seq model, main step used for training."""
    with tf.variable_scope("lstm_seq2seq"):
        if inputs is not None:
            inputs_length = common_layers.length_from_embedding(inputs)
            # Flatten inputs.
            inputs = common_layers.flatten4d3d(inputs)

            # LSTM encoder.
            inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
            _, final_encoder_state = lstm(inputs, inputs_length, hparams,
                                          train, "encoder")
        else:
            final_encoder_state = None

        # LSTM decoder.
        shifted_targets = common_layers.shift_right(targets)
        # Add 1 to account for the padding added to the left from shift_right
        targets_length = common_layers.length_from_embedding(
            shifted_targets) + 1
        decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets),
                                  targets_length,
                                  hparams,
                                  train,
                                  "decoder",
                                  initial_state=final_encoder_state)
        return tf.expand_dims(decoder_outputs, axis=2)
Exemple #3
0
def lstm_seq2seq_internal_bid_encoder(inputs, targets, hparams, train):
  """The basic LSTM seq2seq model with bidirectional encoder."""
  with tf.variable_scope("lstm_seq2seq_bid_encoder"):
    if inputs is not None:
      inputs_length = common_layers.length_from_embedding(inputs)
      # Flatten inputs.
      inputs = common_layers.flatten4d3d(inputs)
      # LSTM encoder.
      _, final_encoder_state = lstm_bid_encoder(
          inputs, inputs_length, hparams, train, "encoder")
    else:
      inputs_length = None
      final_encoder_state = None
    # LSTM decoder.
    shifted_targets = common_layers.shift_right(targets)
    # Add 1 to account for the padding added to the left from shift_right
    targets_length = common_layers.length_from_embedding(shifted_targets) + 1
    hparams_decoder = copy.copy(hparams)
    hparams_decoder.hidden_size = 2 * hparams.hidden_size
    decoder_outputs, _ = lstm(
        common_layers.flatten4d3d(shifted_targets),
        targets_length,
        hparams_decoder,
        train,
        "decoder",
        initial_state=final_encoder_state)
    return tf.expand_dims(decoder_outputs, axis=2)
Exemple #4
0
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train):
    """LSTM seq2seq model with attention, main step used for training."""
    with tf.variable_scope("lstm_seq2seq_attention"):
        # This is a temporary fix for varying-length sequences within in a batch.
        # A more complete fix should pass a length tensor from outside so that
        # all the lstm variants can use it.
        inputs_length = common_layers.length_from_embedding(inputs)
        # Flatten inputs.
        inputs = common_layers.flatten4d3d(inputs)
        # LSTM encoder.
        encoder_outputs, final_encoder_state = lstm(
            inputs, hparams, train, "encoder", sequence_length=inputs_length)
        # LSTM decoder with attention
        shifted_targets = common_layers.shift_right(targets)
        # Add 1 to account for the padding added to the left from shift_right
        targets_length = common_layers.length_from_embedding(
            shifted_targets) + 1
        decoder_outputs, _ = lstm_attention_decoder(
            common_layers.flatten4d3d(shifted_targets),
            hparams,
            train,
            "decoder",
            final_encoder_state,
            encoder_outputs,
            encoder_output_length=inputs_length,
            decoder_input_length=targets_length)
        return tf.expand_dims(decoder_outputs, axis=2)
Exemple #5
0
    def _build_inputs_and_targets(self,
                                  from_seqs=None,
                                  from_tags=None,
                                  to_seqs=None,
                                  to_tags=None):
        """Given from and to sequences and tags, construct inputs and targets."""
        del from_tags  # Unused.
        if from_seqs is not None:
            inputs = from_seqs
            inputs_length = common_layers.length_from_embedding(inputs)
            if to_tags is not None:
                # Add to-tags to the inputs and adjust lengths.
                # <float32> [batch_size, seq_len + 1, 1, emb_size].
                inputs = tf.concat([to_tags, inputs], axis=1)
                inputs_length = inputs_length + 1
            inputs = common_layers.flatten4d3d(inputs)
        else:
            inputs = None
            inputs_length = None

        if to_seqs is not None:
            # Shift to-sequences to form targets.
            # <float32> [batch_size, seq_len, 1, emb_size].
            targets = common_layers.shift_right(to_seqs)
            # Add 1 to account for the padding added to the left from shift_right.
            targets_length = common_layers.length_from_embedding(targets) + 1
            targets = common_layers.flatten4d3d(targets)
        else:
            targets = None
            targets_length = None

        return (inputs, inputs_length), (targets, targets_length)
def lstm_seq2seq_internal_bid_encoder(inputs, targets, hparams, train):
    """The basic LSTM seq2seq model with bidirectional encoder."""
    with tf.variable_scope("lstm_seq2seq_bid_encoder"):
        if inputs is not None:
            inputs_length = common_layers.length_from_embedding(inputs)
            # Flatten inputs.
            inputs = common_layers.flatten4d3d(inputs)
            # LSTM encoder.
            _, final_encoder_state = lstm_bid_encoder(inputs, inputs_length,
                                                      hparams, train,
                                                      "encoder")
        else:
            inputs_length = None
            final_encoder_state = None
        # LSTM decoder.
        shifted_targets = common_layers.shift_right(targets)
        # Add 1 to account for the padding added to the left from shift_right
        targets_length = common_layers.length_from_embedding(
            shifted_targets) + 1
        hparams_decoder = copy.copy(hparams)
        hparams_decoder.hidden_size = 2 * hparams.hidden_size
        decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets),
                                  targets_length,
                                  hparams_decoder,
                                  train,
                                  "decoder",
                                  initial_state=final_encoder_state)
        return tf.expand_dims(decoder_outputs, axis=2)
Exemple #7
0
def lstm_seq2seq_internal(inputs, targets, hparams, train):
  """The basic LSTM seq2seq model, main step used for training."""
  with tf.variable_scope("lstm_seq2seq"):
    if inputs is not None:
      inputs_length = common_layers.length_from_embedding(inputs)
      # Flatten inputs.
      inputs = common_layers.flatten4d3d(inputs)

      # LSTM encoder.
      inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
      _, final_encoder_state = lstm(inputs, inputs_length, hparams, train,
                                    "encoder")
    else:
      final_encoder_state = None

    # LSTM decoder.
    shifted_targets = common_layers.shift_right(targets)
    # Add 1 to account for the padding added to the left from shift_right
    targets_length = common_layers.length_from_embedding(shifted_targets) + 1
    decoder_outputs, _ = lstm(
        common_layers.flatten4d3d(shifted_targets),
        targets_length,
        hparams,
        train,
        "decoder",
        initial_state=final_encoder_state)
    return tf.expand_dims(decoder_outputs, axis=2)
def nested_list_operations(input_seq, action_seq, hp, name=""):

    batch_size, _, _, hidden_size = common_layers.shape_list(input_seq)
    # hidden_size = hp.hidden_size
    # if hp.concat_context:
    #     hidden_size *= 2
    cell = NestedListOperationCell(hidden_size,
                                   list_size=hp.list_size,
                                   num_lists=hp.num_lists)

    sequence_length = common_layers.length_from_embedding(input_seq)
    sequence_length = tf.identity(sequence_length, "sequence_length")
    # hidden_size = common_layers.shape_list(input_seq)[-1]

    cell_input = tf.concat([action_seq, input_seq], axis=-1)
    # batch_size = common_layers.shape_list(cell_input)[0]
    cell_input = tf.squeeze(cell_input, axis=2)
    cell_input = tf.identity(cell_input, "cell_input")
    initial_state = tf.zeros(shape=[batch_size, cell.state_size],
                             dtype=tf.float32)
    with tf.variable_scope(name):
        history, final_states = tf.nn.dynamic_rnn(cell,
                                                  cell_input,
                                                  sequence_length,
                                                  initial_state=initial_state,
                                                  dtype=tf.float32,
                                                  time_major=False)
    grid_structured_states = tf.reshape(
        final_states, [-1, cell.num_lists, cell.list_size, hidden_size])
    grid_structured_states = tf.identity(grid_structured_states,
                                         "grid_structured_states")
    return grid_structured_states
Exemple #9
0
def bid_gru_encode(input_seq, hparams, target_space, features, name, sequence_length=None):
    if sequence_length == None:
        sequence_length = common_layers.length_from_embedding(input_seq)
    input_seq = common_layers.flatten4d3d(input_seq)
    with tf.variable_scope(name):
        cell_fw = [tf.nn.rnn_cell.GRUCell(hparams.hidden_size) for _ in range(hparams.num_hidden_layers)]
        cell_bw = [tf.nn.rnn_cell.GRUCell(hparams.hidden_size) for _ in range(hparams.num_hidden_layers)]

        ((encoder_fw_outputs, encoder_bw_outputs),
         (encoder_fw_state, encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn(
            tf.nn.rnn_cell.MultiRNNCell(cell_fw),
            tf.nn.rnn_cell.MultiRNNCell(cell_bw),
            input_seq,
            sequence_length,
            initial_state_fw=None,
            initial_state_bw=None,
            dtype=tf.float32,
            time_major=False)

        encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)
        encoder_outputs = tf.expand_dims(encoder_outputs, axis=-2)

        final_output = tf.concat((encoder_fw_state[-1], encoder_bw_state[-1]),-1)
        final_output = tf.expand_dims(final_output, axis=-2)
        final_output = tf.expand_dims(final_output, axis=-2)
    return encoder_outputs, final_output
Exemple #10
0
    def create_model_encode_decode(
            self, inputs, y_id):  # inp[batch step 1 hid]  yid[batch step 1 1]
        hparams = self.hparams
        train_flag = self.train_flag
        vocab_size = self.vocabsz
        embeddings_y = self.embeddings_y
        with tf.variable_scope("foo", reuse=tf.AUTO_REUSE):
            ### y embed

            y = tf.nn.embedding_lookup(embeddings_y, y_id)
            y = tf.squeeze(y, axis=3)  # [? ? 1 hid]

            if len(inputs.shape) == 2:  # [batch hid]
                inputs = tf.expand_dims(tf.expand_dims(inputs, axis=1), axis=1)
            inputs_length = common_layers.length_from_embedding(
                inputs)  # [batch step 1 hid]
            #  Flatten inputs.
            inputs = common_layers.flatten4d3d(inputs)

            # LSTM encoder.
            inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
            _, final_encoder_state = lstm_yr(
                inputs, inputs_length, hparams, train_flag,
                "encoder")  # finale_encode_state must be lstmStateTuple

            ##
            # LSTM decoder.
            shifted_targets = common_layers.shift_right(
                y)  # [46,23,78]->[0,46,23] | [batch step 1 hid]
            # Add 1 to account for the padding added to the left from shift_right
            targets_length = common_layers.length_from_embedding(
                shifted_targets) + 1

            decoder_outputs, _ = lstm_yr(
                common_layers.flatten4d3d(shifted_targets),
                targets_length,
                hparams,
                train_flag,
                "decoder",
                initial_state=final_encoder_state)

            # decode output [batch step hid]
            decoder_outputs = tf.layers.dense(inputs=decoder_outputs,
                                              units=vocab_size)
            # ->[batch step vocabsz]
            decoder_outputs = self.tensor3dto4d(decoder_outputs)
            return decoder_outputs
def target_reversing_and_padding(target_seq, list_size):
    targets_length = common_layers.length_from_embedding(target_seq)
    flipped_target_seq = tf.reverse_sequence(target_seq,
                                             targets_length,
                                             seq_axis=1)
    flipped_target_seq = tf.pad(flipped_target_seq,
                                [[0, 0], [0, list_size], [0, 0], [0, 0]])
    flipped_target_seq = flipped_target_seq[:, :list_size, :, :]
    flipped_target_seq = tf.identity(flipped_target_seq, "flipped_target_seq")
    return flipped_target_seq
Exemple #12
0
    def body(self, features):
        inputs = features["inputs"]
        train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN

        encoder_outputs, final_encoder_state, encoder_decoder_attention_bias, inputs_length = \
            self.encode(inputs, self._hparams)

        if "targets_actions" in features:
            targets = features["targets_actions"]
        else:
            tf.logging.warn(
                "CopySeq2Seq must be used with a SemanticParsing problem with a ShiftReduceGrammar; bad things will happen otherwise"
            )
            targets = features["targets"]

        # LSTM decoder with attention
        shifted_targets = common_layers.shift_right(targets)

        # Add 1 to account for the padding added to the left from shift_right
        targets_length = common_layers.length_from_embedding(
            shifted_targets) + 1
        shifted_targets = common_layers.flatten4d3d(shifted_targets)

        hparams_decoder = copy.copy(self._hparams)
        hparams_decoder.hidden_size = 2 * self._hparams.hidden_size

        decoder_output = lstm_attention_decoder(shifted_targets,
                                                hparams_decoder, train,
                                                "decoder", final_encoder_state,
                                                encoder_outputs, inputs_length,
                                                targets_length)
        decoder_output = tf.expand_dims(decoder_output, axis=2)

        body_output = dict()
        target_modality = self._problem_hparams.target_modality \
            if self._problem_hparams else {"targets": None}

        assert self._hparams.pointer_layer in ("attentive",
                                               "decaying_attentive")

        for key, modality in target_modality.items():
            if isinstance(modality, CopyModality):
                with tf.variable_scope("copy_layer/" + key):
                    if self._hparams.pointer_layer == "decaying_attentive":
                        output_layer = DecayingAttentivePointerLayer(
                            encoder_outputs)
                    else:
                        output_layer = AttentivePointerLayer(encoder_outputs)
                    scores = output_layer(decoder_output)
                    scores += encoder_decoder_attention_bias
                    body_output[key] = scores
            else:
                body_output[key] = decoder_output

        return body_output
def lstm_seq2seq_internal_attention_bid_encoder(inputs, targets, hparams,
                                                train):
  """LSTM seq2seq model with attention, main step used for training."""
  with tf.variable_scope("lstm_seq2seq_attention_bid_encoder"):
    inputs_length = common_layers.length_from_embedding(inputs)
    # Flatten inputs.
    inputs = common_layers.flatten4d3d(inputs)
    # LSTM encoder.
    encoder_outputs, final_encoder_state = lstm_bid_encoder(
        inputs, inputs_length, hparams, train, "encoder")
    # LSTM decoder with attention
    shifted_targets = common_layers.shift_right(targets)
    # Add 1 to account for the padding added to the left from shift_right
    targets_length = common_layers.length_from_embedding(shifted_targets) + 1
    hparams_decoder = copy.copy(hparams)
    hparams_decoder.hidden_size = 2 * hparams.hidden_size
    decoder_outputs = lstm_attention_decoder(
        common_layers.flatten4d3d(shifted_targets), hparams_decoder, train,
        "decoder", final_encoder_state, encoder_outputs,
        inputs_length, targets_length)
    return tf.expand_dims(decoder_outputs, axis=2)
Exemple #14
0
 def body(self, features):
   if self._hparams.initializer == "orthogonal":
     raise ValueError("LSTM models fail with orthogonal initializer.")
   train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN
   inputs = features.get("inputs")
   inputs_length = common_layers.length_from_embedding(inputs)
   # Flatten inputs.
   inputs = common_layers.flatten4d3d(inputs)
   # LSTM encoder.
   inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
   encoder_output, _ = lstm(inputs, inputs_length, self._hparams, train,
                            "encoder")
   return tf.expand_dims(encoder_output, axis=2)
Exemple #15
0
 def body(self, features):
     if self._hparams.initializer == "orthogonal":
         raise ValueError("LSTM models fail with orthogonal initializer.")
     train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN
     inputs = features.get("inputs")
     inputs_length = common_layers.length_from_embedding(inputs)
     # Flatten inputs.
     inputs = common_layers.flatten4d3d(inputs)
     # LSTM encoder.
     inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
     encoder_output, _ = lstm(inputs, inputs_length, self._hparams, train,
                              "encoder")
     return tf.expand_dims(encoder_output, axis=2)
Exemple #16
0
  def _build_lm_inputs(self, features):
    """Builds inputs and targets for LM training."""
    targets = features["targets"]
    target_tags = features["target_tags"]

    if self._hparams.mode == tf.estimator.ModeKeys.PREDICT:
      target_tags = tf.tile(target_tags, [self._hparams.beam_width, 1, 1, 1])

    # Construct LM inputs.
    inputs = common_layers.shift_right(targets, pad_value=target_tags)
    inputs_length = common_layers.length_from_embedding(targets) + 1
    inputs = common_layers.flatten4d3d(inputs)

    return inputs, inputs_length
Exemple #17
0
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train):
  """LSTM seq2seq model with attention, main step used for training."""
  with tf.variable_scope("lstm_seq2seq_attention"):
    # This is a temporary fix for varying-length sequences within in a batch.
    # A more complete fix should pass a length tensor from outside so that
    # all the lstm variants can use it.
    inputs_length = common_layers.length_from_embedding(inputs)
    # Flatten inputs.
    inputs = common_layers.flatten4d3d(inputs)

    # LSTM encoder.
    inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1)
    encoder_outputs, final_encoder_state = lstm(
        inputs, inputs_length, hparams, train, "encoder")

    # LSTM decoder with attention.
    shifted_targets = common_layers.shift_right(targets)
    # Add 1 to account for the padding added to the left from shift_right
    targets_length = common_layers.length_from_embedding(shifted_targets) + 1
    decoder_outputs = lstm_attention_decoder(
        common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder",
        final_encoder_state, encoder_outputs, inputs_length, targets_length)
    return tf.expand_dims(decoder_outputs, axis=2)
Exemple #18
0
    def encode(self, inputs, hparams, features=None):
        train = hparams.mode == tf.estimator.ModeKeys.TRAIN
        inputs_length = common_layers.length_from_embedding(inputs)

        # Flatten inputs.
        inputs = common_layers.flatten4d3d(inputs)

        encoder_padding = common_attention.embedding_to_padding(inputs)
        encoder_decoder_attention_bias = common_attention.attention_bias_ignore_padding(
            encoder_padding)

        # LSTM encoder.
        encoder_outputs, final_encoder_state = lstm_bid_encoder(
            inputs, inputs_length, self._hparams, train, "encoder")

        return encoder_outputs, final_encoder_state, encoder_decoder_attention_bias, inputs_length
Exemple #19
0
    def body(self, features):
        inputs = features["inputs"]
        hparams = self._hparams
        train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN
        with tf.variable_scope("lstm"):
            inputs_length = common_layers.length_from_embedding(inputs)
            inputs = common_layers.flatten4d3d(inputs)

            _, final_encoder_state = lstm.lstm(inputs,
                                               inputs_length,
                                               hparams,
                                               train,
                                               name="encoder")
            final_output = final_encoder_state[-1]
            c, h = final_output
            final_hidden_output = tf.expand_dims(h, axis=-2)
            final_hidden_output = tf.expand_dims(final_hidden_output, axis=-2)
            return final_hidden_output
Exemple #20
0
def lstm_encode(input_seq, hparams, target_space, features, name, sequence_length=None):
    if sequence_length == None:
        sequence_length = common_layers.length_from_embedding(input_seq)
    input_seq = common_layers.flatten4d3d(input_seq)
    layers = [tf.nn.rnn_cell.LSTMCell(hparams.hidden_size) for _ in range(hparams.num_hidden_layers)]
    with tf.variable_scope(name):
        # hidden_outputs (outputs of last layer) :  [batch_size, seq_len, hidden_size]
        # layer_final_output (layer-wise final outputs) : [num_hidden_layers, 2, batch_size, hidden_size]
        hidden_outputs, layer_final_output = tf.nn.dynamic_rnn(
            tf.nn.rnn_cell.MultiRNNCell(layers),
            input_seq,
            sequence_length,
            initial_state=None,
            dtype=tf.float32,
            time_major=False)
    hidden_outputs = tf.expand_dims(hidden_outputs, axis=-2)
    c, h = layer_final_output[-1]
    final_output = h
    final_output = tf.expand_dims(final_output, axis=-2)
    final_output = tf.expand_dims(final_output, axis=-2)
    return hidden_outputs, final_output
Exemple #21
0
    def render2cmd_v3_internal(self, features, hparams, train):
        # inputs and targets are both sequences with
        # shape = [batch, seq_len, 1, hparams.problem.feature_dim]
        targets = features['targets']
        losses = {}

        sampled_bottleneck = self.pretrained_visual_encoder(features, hparams)
        if hparams.sg_bottleneck:
            sampled_bottleneck = tf.stop_gradient(sampled_bottleneck)

        with tf.variable_scope('render2cmd_v3_internal'):
            # override bottleneck, or return it, if requested
            if 'bottleneck' in features:
                if common_layers.shape_list(features['bottleneck'])[0] == 0:
                    # return sampled_bottleneck,
                    # set losses['training'] = 0 so self.top() doesn't get called on it
                    return sampled_bottleneck, {'training': 0.0}
                else:
                    # we want to use the given bottleneck
                    sampled_bottleneck = features['bottleneck']

            # finalize bottleneck
            unbottleneck_dim = hparams.hidden_size * 2  # twice because using LSTM
            if hparams.twice_decoder:
                unbottleneck_dim = unbottleneck_dim * 2

            # unbottleneck back to LSTMStateTuple
            dec_initial_state = []
            for hi in range(hparams.num_hidden_layers):
                unbottleneck = self.unbottleneck(sampled_bottleneck,
                                                 unbottleneck_dim,
                                                 name_append='_{}'.format(hi))
                dec_initial_state.append(
                    rnn.LSTMStateTuple(
                        c=unbottleneck[:, :unbottleneck_dim // 2],
                        h=unbottleneck[:, unbottleneck_dim // 2:]))

            dec_initial_state = tuple(dec_initial_state)

            shifted_targets = common_layers.shift_right(targets)
            # Add 1 to account for the padding added to the left from shift_right
            targets_length = common_layers.length_from_embedding(
                shifted_targets) + 1

            # LSTM decoder
            hparams_decoder = copy.copy(hparams)
            if hparams.twice_decoder:
                hparams_decoder.hidden_size = 2 * hparams.hidden_size

            if hparams.mode == tf.estimator.ModeKeys.PREDICT:
                decoder_outputs, _ = self.lstm_decoder_infer(
                    common_layers.flatten4d3d(shifted_targets),
                    targets_length,
                    hparams_decoder,
                    features['targets_cls'],
                    train,
                    initial_state=dec_initial_state,
                    bottleneck=sampled_bottleneck)
            else:
                decoder_outputs, _ = self.lstm_decoder(
                    common_layers.flatten4d3d(shifted_targets),
                    targets_length,
                    hparams_decoder,
                    features['targets_cls'],
                    train,
                    initial_state=dec_initial_state,
                    bottleneck=sampled_bottleneck)

            ret = tf.expand_dims(decoder_outputs, axis=2)

        return ret, losses
def infer_valid_length_from_top_list(top_list, list_size):
    argmaxed_top_list = get_argmaxed_top_list(top_list, list_size)
    valid_length = common_layers.length_from_embedding(argmaxed_top_list)
    valid_length = tf.identity(valid_length, "output_valid_length")
    return valid_length
    def render2cmd_v3_internal(self, features, hparams, train):
        # inputs and targets are both sequences with
        # shape = [batch, seq_len, 1, hparams.problem.feature_dim]
        print(
            "render2cmd_v3_internal render2cmd_v3_internalrender2cmd_v3_internalrender2cmd_v3_internalrender2cmd_v3_internal"
        )
        all_targets = features['targets']
        all_targets_cls = features['targets_cls']
        all_targets_font_cls = features['targets_fnt']
        all_targets_psr = features['targets_psr']
        all_batch_size = common_layers.shape_list(all_targets)[0]
        batch_size = all_batch_size // 2
        sources = all_targets[:batch_size, ...]
        sources_cls = all_targets_cls[:batch_size, ...]
        sources_fnt = all_targets_font_cls[:batch_size, ...]
        sources_psr = all_targets_psr[:batch_size, ...]
        targets = all_targets[batch_size:, ...]
        targets_cls = all_targets_cls[batch_size:, ...]
        targets_fnt = all_targets_font_cls[batch_size:, ...]
        targets_psr = all_targets_psr[batch_size:, ...]

        losses = {}
        # sampled_bottleneck = self.pretrained_visual_encoder(features, hparams)

        # if hparams.sg_bottleneck:
        #     sampled_bottleneck = tf.stop_gradient(sampled_bottleneck)
        # embd = self.cls_embedding(sources_cls, sources_fnt, targets_cls, targets_fnt)
        vis_embd = self.vis_encoder(sources_psr, targets_psr, targets_cls)
        # print("embd embd embd embd embd embd embd ", embd.shape)
        print("vis embd vis embd vis embd vis embd vis", vis_embd.shape)
        sampled_bottleneck = vis_embd

        with tf.variable_scope('render2cmd_v3_internal'):
            # override bottleneck, or return it, if requested
            # if 'bottleneck' in features:
            #     if common_layers.shape_list(features['bottleneck'])[0] == 0:
            #         # return sampled_bottleneck,
            #         # set losses['training'] = 0 so self.top() doesn't get called on it
            #         print("RETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURN")
            #         return sampled_bottleneck, {'training': 0.0}
            #     else:
            #         # we want to use the given bottleneck
            #         sampled_bottleneck = features['bottleneck']

            # finalize bottleneck
            unbottleneck_dim = hparams.hidden_size * 2  # twice because using LSTM
            if hparams.twice_decoder:
                unbottleneck_dim = unbottleneck_dim * 2

            dec_initial_state = []

            # LSTM encoder
            _, encoder_output_states = self.lstm_encoder(
                common_layers.flatten4d3d(sources), hparams)

            print(
                "targets shape targets shape targets shape targets shape targets shape ",
                targets.shape)
            print('run stacking...')
            print(
                "sample bottleneck shape sample bottleneck shape sample bottleneck shape ",
                sampled_bottleneck.shape)
            print(
                "sources shape sources shape sources shape sources shape sources shape",
                sources.shape)
            # input()
            for hi in range(hparams.num_hidden_layers):
                unbottleneck = self.unbottleneck(sampled_bottleneck,
                                                 unbottleneck_dim,
                                                 name_append='_{}'.format(hi))
                c, h = encoder_output_states[hi]
                # print(unbottleneck.shape)
                # print(c.shape, h.shape)
                # first_dim = common_layers.shape_list(unbottleneck)[0]
                # print(first_dim)
                # c = tf.tile(c,[first_dim,1])
                # h = tf.tile(h,[first_dim,1])
                # input()
                dec_initial_state.append(
                    tf.nn.rnn_cell.LSTMStateTuple(
                        c=tf.concat(
                            [unbottleneck[:, :unbottleneck_dim // 2], c], 1),
                        h=tf.concat(
                            [unbottleneck[:, unbottleneck_dim // 2:], h], 1)))

            dec_initial_state = tuple(dec_initial_state)
            # print('checkshape dec_initial_state')
            # print(dec_initial_state)
            # input()
            shifted_targets = common_layers.shift_right(targets)
            # Add 1 to account for the padding added to the left from shift_right
            targets_length = common_layers.length_from_embedding(
                shifted_targets) + 1

            # LSTM decoder
            hparams_decoder = copy.copy(hparams)
            if hparams.twice_decoder:
                hparams_decoder.hidden_size = 2 * hparams.hidden_size

            if hparams.mode == tf.estimator.ModeKeys.PREDICT:
                decoder_outputs, _ = self.lstm_decoder_infer(
                    common_layers.flatten4d3d(shifted_targets),
                    targets_length,
                    hparams_decoder,
                    targets_cls,
                    train,
                    initial_state=dec_initial_state,
                    bottleneck=sampled_bottleneck)
            else:
                decoder_outputs, _ = self.lstm_decoder(
                    common_layers.flatten4d3d(shifted_targets),
                    targets_length,
                    hparams_decoder,
                    targets_cls,
                    train,
                    initial_state=dec_initial_state,
                    bottleneck=sampled_bottleneck)

            ret = tf.expand_dims(decoder_outputs, axis=2)
        return ret, losses