def _preprocess(self, features): """Preprocesses features for multilingual translation.""" inputs = features["inputs"] targets = features["targets"] target_tags = features["target_tags"] # Expand target tags to beam width, if necessary. if self._hparams.mode == tf_estimator.ModeKeys.PREDICT: # <float32> [batch_size * beam_width, 1, 1, emb_size]. beam_width = self._hparams.beam_width target_tags = tf.tile(target_tags, [beam_width, 1, 1, 1]) # Add target tags to the input sequences. # <float32> [batch_size, seq_len + 1, 1, emb_size]. inputs = tf.concat([target_tags, inputs], axis=1) # Compute length of the input sequences. inputs_length = common_layers.length_from_embedding(inputs) inputs = common_layers.flatten4d3d(inputs) # Preprocess targets. targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right. targets_length = common_layers.length_from_embedding(targets) + 1 targets = common_layers.flatten4d3d(targets) return inputs, inputs_length, targets, targets_length
def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): if inputs is not None: inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) _, final_encoder_state = lstm(inputs, inputs_length, hparams, train, "encoder") else: final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets), targets_length, hparams, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal_bid_encoder(inputs, targets, hparams, train): """The basic LSTM seq2seq model with bidirectional encoder.""" with tf.variable_scope("lstm_seq2seq_bid_encoder"): if inputs is not None: inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm_bid_encoder( inputs, inputs_length, hparams, train, "encoder") else: inputs_length = None final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding(shifted_targets) + 1 hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs, _ = lstm( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention"): # This is a temporary fix for varying-length sequences within in a batch. # A more complete fix should pass a length tensor from outside so that # all the lstm variants can use it. inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. encoder_outputs, final_encoder_state = lstm( inputs, hparams, train, "encoder", sequence_length=inputs_length) # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 decoder_outputs, _ = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs, encoder_output_length=inputs_length, decoder_input_length=targets_length) return tf.expand_dims(decoder_outputs, axis=2)
def _build_inputs_and_targets(self, from_seqs=None, from_tags=None, to_seqs=None, to_tags=None): """Given from and to sequences and tags, construct inputs and targets.""" del from_tags # Unused. if from_seqs is not None: inputs = from_seqs inputs_length = common_layers.length_from_embedding(inputs) if to_tags is not None: # Add to-tags to the inputs and adjust lengths. # <float32> [batch_size, seq_len + 1, 1, emb_size]. inputs = tf.concat([to_tags, inputs], axis=1) inputs_length = inputs_length + 1 inputs = common_layers.flatten4d3d(inputs) else: inputs = None inputs_length = None if to_seqs is not None: # Shift to-sequences to form targets. # <float32> [batch_size, seq_len, 1, emb_size]. targets = common_layers.shift_right(to_seqs) # Add 1 to account for the padding added to the left from shift_right. targets_length = common_layers.length_from_embedding(targets) + 1 targets = common_layers.flatten4d3d(targets) else: targets = None targets_length = None return (inputs, inputs_length), (targets, targets_length)
def lstm_seq2seq_internal_bid_encoder(inputs, targets, hparams, train): """The basic LSTM seq2seq model with bidirectional encoder.""" with tf.variable_scope("lstm_seq2seq_bid_encoder"): if inputs is not None: inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm_bid_encoder(inputs, inputs_length, hparams, train, "encoder") else: inputs_length = None final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): if inputs is not None: inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) _, final_encoder_state = lstm(inputs, inputs_length, hparams, train, "encoder") else: final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding(shifted_targets) + 1 decoder_outputs, _ = lstm( common_layers.flatten4d3d(shifted_targets), targets_length, hparams, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def nested_list_operations(input_seq, action_seq, hp, name=""): batch_size, _, _, hidden_size = common_layers.shape_list(input_seq) # hidden_size = hp.hidden_size # if hp.concat_context: # hidden_size *= 2 cell = NestedListOperationCell(hidden_size, list_size=hp.list_size, num_lists=hp.num_lists) sequence_length = common_layers.length_from_embedding(input_seq) sequence_length = tf.identity(sequence_length, "sequence_length") # hidden_size = common_layers.shape_list(input_seq)[-1] cell_input = tf.concat([action_seq, input_seq], axis=-1) # batch_size = common_layers.shape_list(cell_input)[0] cell_input = tf.squeeze(cell_input, axis=2) cell_input = tf.identity(cell_input, "cell_input") initial_state = tf.zeros(shape=[batch_size, cell.state_size], dtype=tf.float32) with tf.variable_scope(name): history, final_states = tf.nn.dynamic_rnn(cell, cell_input, sequence_length, initial_state=initial_state, dtype=tf.float32, time_major=False) grid_structured_states = tf.reshape( final_states, [-1, cell.num_lists, cell.list_size, hidden_size]) grid_structured_states = tf.identity(grid_structured_states, "grid_structured_states") return grid_structured_states
def bid_gru_encode(input_seq, hparams, target_space, features, name, sequence_length=None): if sequence_length == None: sequence_length = common_layers.length_from_embedding(input_seq) input_seq = common_layers.flatten4d3d(input_seq) with tf.variable_scope(name): cell_fw = [tf.nn.rnn_cell.GRUCell(hparams.hidden_size) for _ in range(hparams.num_hidden_layers)] cell_bw = [tf.nn.rnn_cell.GRUCell(hparams.hidden_size) for _ in range(hparams.num_hidden_layers)] ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_state, encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn( tf.nn.rnn_cell.MultiRNNCell(cell_fw), tf.nn.rnn_cell.MultiRNNCell(cell_bw), input_seq, sequence_length, initial_state_fw=None, initial_state_bw=None, dtype=tf.float32, time_major=False) encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2) encoder_outputs = tf.expand_dims(encoder_outputs, axis=-2) final_output = tf.concat((encoder_fw_state[-1], encoder_bw_state[-1]),-1) final_output = tf.expand_dims(final_output, axis=-2) final_output = tf.expand_dims(final_output, axis=-2) return encoder_outputs, final_output
def create_model_encode_decode( self, inputs, y_id): # inp[batch step 1 hid] yid[batch step 1 1] hparams = self.hparams train_flag = self.train_flag vocab_size = self.vocabsz embeddings_y = self.embeddings_y with tf.variable_scope("foo", reuse=tf.AUTO_REUSE): ### y embed y = tf.nn.embedding_lookup(embeddings_y, y_id) y = tf.squeeze(y, axis=3) # [? ? 1 hid] if len(inputs.shape) == 2: # [batch hid] inputs = tf.expand_dims(tf.expand_dims(inputs, axis=1), axis=1) inputs_length = common_layers.length_from_embedding( inputs) # [batch step 1 hid] # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) _, final_encoder_state = lstm_yr( inputs, inputs_length, hparams, train_flag, "encoder") # finale_encode_state must be lstmStateTuple ## # LSTM decoder. shifted_targets = common_layers.shift_right( y) # [46,23,78]->[0,46,23] | [batch step 1 hid] # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 decoder_outputs, _ = lstm_yr( common_layers.flatten4d3d(shifted_targets), targets_length, hparams, train_flag, "decoder", initial_state=final_encoder_state) # decode output [batch step hid] decoder_outputs = tf.layers.dense(inputs=decoder_outputs, units=vocab_size) # ->[batch step vocabsz] decoder_outputs = self.tensor3dto4d(decoder_outputs) return decoder_outputs
def target_reversing_and_padding(target_seq, list_size): targets_length = common_layers.length_from_embedding(target_seq) flipped_target_seq = tf.reverse_sequence(target_seq, targets_length, seq_axis=1) flipped_target_seq = tf.pad(flipped_target_seq, [[0, 0], [0, list_size], [0, 0], [0, 0]]) flipped_target_seq = flipped_target_seq[:, :list_size, :, :] flipped_target_seq = tf.identity(flipped_target_seq, "flipped_target_seq") return flipped_target_seq
def body(self, features): inputs = features["inputs"] train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN encoder_outputs, final_encoder_state, encoder_decoder_attention_bias, inputs_length = \ self.encode(inputs, self._hparams) if "targets_actions" in features: targets = features["targets_actions"] else: tf.logging.warn( "CopySeq2Seq must be used with a SemanticParsing problem with a ShiftReduceGrammar; bad things will happen otherwise" ) targets = features["targets"] # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 shifted_targets = common_layers.flatten4d3d(shifted_targets) hparams_decoder = copy.copy(self._hparams) hparams_decoder.hidden_size = 2 * self._hparams.hidden_size decoder_output = lstm_attention_decoder(shifted_targets, hparams_decoder, train, "decoder", final_encoder_state, encoder_outputs, inputs_length, targets_length) decoder_output = tf.expand_dims(decoder_output, axis=2) body_output = dict() target_modality = self._problem_hparams.target_modality \ if self._problem_hparams else {"targets": None} assert self._hparams.pointer_layer in ("attentive", "decaying_attentive") for key, modality in target_modality.items(): if isinstance(modality, CopyModality): with tf.variable_scope("copy_layer/" + key): if self._hparams.pointer_layer == "decaying_attentive": output_layer = DecayingAttentivePointerLayer( encoder_outputs) else: output_layer = AttentivePointerLayer(encoder_outputs) scores = output_layer(decoder_output) scores += encoder_decoder_attention_bias body_output[key] = scores else: body_output[key] = decoder_output return body_output
def lstm_seq2seq_internal_attention_bid_encoder(inputs, targets, hparams, train): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention_bid_encoder"): inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. encoder_outputs, final_encoder_state = lstm_bid_encoder( inputs, inputs_length, hparams, train, "encoder") # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding(shifted_targets) + 1 hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams_decoder, train, "decoder", final_encoder_state, encoder_outputs, inputs_length, targets_length) return tf.expand_dims(decoder_outputs, axis=2)
def body(self, features): if self._hparams.initializer == "orthogonal": raise ValueError("LSTM models fail with orthogonal initializer.") train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN inputs = features.get("inputs") inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) encoder_output, _ = lstm(inputs, inputs_length, self._hparams, train, "encoder") return tf.expand_dims(encoder_output, axis=2)
def body(self, features): if self._hparams.initializer == "orthogonal": raise ValueError("LSTM models fail with orthogonal initializer.") train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN inputs = features.get("inputs") inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) encoder_output, _ = lstm(inputs, inputs_length, self._hparams, train, "encoder") return tf.expand_dims(encoder_output, axis=2)
def _build_lm_inputs(self, features): """Builds inputs and targets for LM training.""" targets = features["targets"] target_tags = features["target_tags"] if self._hparams.mode == tf.estimator.ModeKeys.PREDICT: target_tags = tf.tile(target_tags, [self._hparams.beam_width, 1, 1, 1]) # Construct LM inputs. inputs = common_layers.shift_right(targets, pad_value=target_tags) inputs_length = common_layers.length_from_embedding(targets) + 1 inputs = common_layers.flatten4d3d(inputs) return inputs, inputs_length
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention"): # This is a temporary fix for varying-length sequences within in a batch. # A more complete fix should pass a length tensor from outside so that # all the lstm variants can use it. inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) encoder_outputs, final_encoder_state = lstm( inputs, inputs_length, hparams, train, "encoder") # LSTM decoder with attention. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding(shifted_targets) + 1 decoder_outputs = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs, inputs_length, targets_length) return tf.expand_dims(decoder_outputs, axis=2)
def encode(self, inputs, hparams, features=None): train = hparams.mode == tf.estimator.ModeKeys.TRAIN inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) encoder_padding = common_attention.embedding_to_padding(inputs) encoder_decoder_attention_bias = common_attention.attention_bias_ignore_padding( encoder_padding) # LSTM encoder. encoder_outputs, final_encoder_state = lstm_bid_encoder( inputs, inputs_length, self._hparams, train, "encoder") return encoder_outputs, final_encoder_state, encoder_decoder_attention_bias, inputs_length
def body(self, features): inputs = features["inputs"] hparams = self._hparams train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN with tf.variable_scope("lstm"): inputs_length = common_layers.length_from_embedding(inputs) inputs = common_layers.flatten4d3d(inputs) _, final_encoder_state = lstm.lstm(inputs, inputs_length, hparams, train, name="encoder") final_output = final_encoder_state[-1] c, h = final_output final_hidden_output = tf.expand_dims(h, axis=-2) final_hidden_output = tf.expand_dims(final_hidden_output, axis=-2) return final_hidden_output
def lstm_encode(input_seq, hparams, target_space, features, name, sequence_length=None): if sequence_length == None: sequence_length = common_layers.length_from_embedding(input_seq) input_seq = common_layers.flatten4d3d(input_seq) layers = [tf.nn.rnn_cell.LSTMCell(hparams.hidden_size) for _ in range(hparams.num_hidden_layers)] with tf.variable_scope(name): # hidden_outputs (outputs of last layer) : [batch_size, seq_len, hidden_size] # layer_final_output (layer-wise final outputs) : [num_hidden_layers, 2, batch_size, hidden_size] hidden_outputs, layer_final_output = tf.nn.dynamic_rnn( tf.nn.rnn_cell.MultiRNNCell(layers), input_seq, sequence_length, initial_state=None, dtype=tf.float32, time_major=False) hidden_outputs = tf.expand_dims(hidden_outputs, axis=-2) c, h = layer_final_output[-1] final_output = h final_output = tf.expand_dims(final_output, axis=-2) final_output = tf.expand_dims(final_output, axis=-2) return hidden_outputs, final_output
def render2cmd_v3_internal(self, features, hparams, train): # inputs and targets are both sequences with # shape = [batch, seq_len, 1, hparams.problem.feature_dim] targets = features['targets'] losses = {} sampled_bottleneck = self.pretrained_visual_encoder(features, hparams) if hparams.sg_bottleneck: sampled_bottleneck = tf.stop_gradient(sampled_bottleneck) with tf.variable_scope('render2cmd_v3_internal'): # override bottleneck, or return it, if requested if 'bottleneck' in features: if common_layers.shape_list(features['bottleneck'])[0] == 0: # return sampled_bottleneck, # set losses['training'] = 0 so self.top() doesn't get called on it return sampled_bottleneck, {'training': 0.0} else: # we want to use the given bottleneck sampled_bottleneck = features['bottleneck'] # finalize bottleneck unbottleneck_dim = hparams.hidden_size * 2 # twice because using LSTM if hparams.twice_decoder: unbottleneck_dim = unbottleneck_dim * 2 # unbottleneck back to LSTMStateTuple dec_initial_state = [] for hi in range(hparams.num_hidden_layers): unbottleneck = self.unbottleneck(sampled_bottleneck, unbottleneck_dim, name_append='_{}'.format(hi)) dec_initial_state.append( rnn.LSTMStateTuple( c=unbottleneck[:, :unbottleneck_dim // 2], h=unbottleneck[:, unbottleneck_dim // 2:])) dec_initial_state = tuple(dec_initial_state) shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 # LSTM decoder hparams_decoder = copy.copy(hparams) if hparams.twice_decoder: hparams_decoder.hidden_size = 2 * hparams.hidden_size if hparams.mode == tf.estimator.ModeKeys.PREDICT: decoder_outputs, _ = self.lstm_decoder_infer( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, features['targets_cls'], train, initial_state=dec_initial_state, bottleneck=sampled_bottleneck) else: decoder_outputs, _ = self.lstm_decoder( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, features['targets_cls'], train, initial_state=dec_initial_state, bottleneck=sampled_bottleneck) ret = tf.expand_dims(decoder_outputs, axis=2) return ret, losses
def infer_valid_length_from_top_list(top_list, list_size): argmaxed_top_list = get_argmaxed_top_list(top_list, list_size) valid_length = common_layers.length_from_embedding(argmaxed_top_list) valid_length = tf.identity(valid_length, "output_valid_length") return valid_length
def render2cmd_v3_internal(self, features, hparams, train): # inputs and targets are both sequences with # shape = [batch, seq_len, 1, hparams.problem.feature_dim] print( "render2cmd_v3_internal render2cmd_v3_internalrender2cmd_v3_internalrender2cmd_v3_internalrender2cmd_v3_internal" ) all_targets = features['targets'] all_targets_cls = features['targets_cls'] all_targets_font_cls = features['targets_fnt'] all_targets_psr = features['targets_psr'] all_batch_size = common_layers.shape_list(all_targets)[0] batch_size = all_batch_size // 2 sources = all_targets[:batch_size, ...] sources_cls = all_targets_cls[:batch_size, ...] sources_fnt = all_targets_font_cls[:batch_size, ...] sources_psr = all_targets_psr[:batch_size, ...] targets = all_targets[batch_size:, ...] targets_cls = all_targets_cls[batch_size:, ...] targets_fnt = all_targets_font_cls[batch_size:, ...] targets_psr = all_targets_psr[batch_size:, ...] losses = {} # sampled_bottleneck = self.pretrained_visual_encoder(features, hparams) # if hparams.sg_bottleneck: # sampled_bottleneck = tf.stop_gradient(sampled_bottleneck) # embd = self.cls_embedding(sources_cls, sources_fnt, targets_cls, targets_fnt) vis_embd = self.vis_encoder(sources_psr, targets_psr, targets_cls) # print("embd embd embd embd embd embd embd ", embd.shape) print("vis embd vis embd vis embd vis embd vis", vis_embd.shape) sampled_bottleneck = vis_embd with tf.variable_scope('render2cmd_v3_internal'): # override bottleneck, or return it, if requested # if 'bottleneck' in features: # if common_layers.shape_list(features['bottleneck'])[0] == 0: # # return sampled_bottleneck, # # set losses['training'] = 0 so self.top() doesn't get called on it # print("RETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURN") # return sampled_bottleneck, {'training': 0.0} # else: # # we want to use the given bottleneck # sampled_bottleneck = features['bottleneck'] # finalize bottleneck unbottleneck_dim = hparams.hidden_size * 2 # twice because using LSTM if hparams.twice_decoder: unbottleneck_dim = unbottleneck_dim * 2 dec_initial_state = [] # LSTM encoder _, encoder_output_states = self.lstm_encoder( common_layers.flatten4d3d(sources), hparams) print( "targets shape targets shape targets shape targets shape targets shape ", targets.shape) print('run stacking...') print( "sample bottleneck shape sample bottleneck shape sample bottleneck shape ", sampled_bottleneck.shape) print( "sources shape sources shape sources shape sources shape sources shape", sources.shape) # input() for hi in range(hparams.num_hidden_layers): unbottleneck = self.unbottleneck(sampled_bottleneck, unbottleneck_dim, name_append='_{}'.format(hi)) c, h = encoder_output_states[hi] # print(unbottleneck.shape) # print(c.shape, h.shape) # first_dim = common_layers.shape_list(unbottleneck)[0] # print(first_dim) # c = tf.tile(c,[first_dim,1]) # h = tf.tile(h,[first_dim,1]) # input() dec_initial_state.append( tf.nn.rnn_cell.LSTMStateTuple( c=tf.concat( [unbottleneck[:, :unbottleneck_dim // 2], c], 1), h=tf.concat( [unbottleneck[:, unbottleneck_dim // 2:], h], 1))) dec_initial_state = tuple(dec_initial_state) # print('checkshape dec_initial_state') # print(dec_initial_state) # input() shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 # LSTM decoder hparams_decoder = copy.copy(hparams) if hparams.twice_decoder: hparams_decoder.hidden_size = 2 * hparams.hidden_size if hparams.mode == tf.estimator.ModeKeys.PREDICT: decoder_outputs, _ = self.lstm_decoder_infer( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, targets_cls, train, initial_state=dec_initial_state, bottleneck=sampled_bottleneck) else: decoder_outputs, _ = self.lstm_decoder( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, targets_cls, train, initial_state=dec_initial_state, bottleneck=sampled_bottleneck) ret = tf.expand_dims(decoder_outputs, axis=2) return ret, losses