def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): if inputs is not None: inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) _, final_encoder_state = lstm(inputs, inputs_length, hparams, train, "encoder") else: final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding(shifted_targets) + 1 decoder_outputs, _ = lstm( common_layers.flatten4d3d(shifted_targets), targets_length, hparams, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): if inputs is not None: inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) _, final_encoder_state = lstm(inputs, inputs_length, hparams, train, "encoder") else: final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets), targets_length, hparams, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention"): # This is a temporary fix for varying-length sequences within in a batch. # A more complete fix should pass a length tensor from outside so that # all the lstm variants can use it. inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. encoder_outputs, final_encoder_state = lstm( inputs, hparams, train, "encoder", sequence_length=inputs_length) # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 decoder_outputs, _ = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs, encoder_output_length=inputs_length, decoder_input_length=targets_length) return tf.expand_dims(decoder_outputs, axis=2)
def body(self, features): """Build the main body of the model. Args: features: A dict of "inputs" and "targets" which have already been passed through an embedding layer. Inputs should have shape [batch_size, max_seq_length, 1, embedding_size]. Targets should have shape [batch_size, max_seq_length, 1, 1] Returns: The logits which get passed to the top of the model for inference. A tensor of shape [batch_size, seq_length, 1, embedding_size] """ inputs = features.get("inputs") targets = features["targets"] if inputs is not None: inputs = common_layers.flatten4d3d(inputs) _, final_encoder_state = self._rnn(tf.reverse(inputs, axis=[1]), "encoder") else: final_encoder_state = None shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = self._rnn( common_layers.flatten4d3d(shifted_targets), "decoder", initial_state=final_encoder_state) return decoder_outputs
def _preprocess(self, features): """Preprocesses features for multilingual translation.""" inputs = features["inputs"] targets = features["targets"] target_tags = features["target_tags"] # Expand target tags to beam width, if necessary. if self._hparams.mode == tf_estimator.ModeKeys.PREDICT: # <float32> [batch_size * beam_width, 1, 1, emb_size]. beam_width = self._hparams.beam_width target_tags = tf.tile(target_tags, [beam_width, 1, 1, 1]) # Add target tags to the input sequences. # <float32> [batch_size, seq_len + 1, 1, emb_size]. inputs = tf.concat([target_tags, inputs], axis=1) # Compute length of the input sequences. inputs_length = common_layers.length_from_embedding(inputs) inputs = common_layers.flatten4d3d(inputs) # Preprocess targets. targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right. targets_length = common_layers.length_from_embedding(targets) + 1 targets = common_layers.flatten4d3d(targets) return inputs, inputs_length, targets, targets_length
def lstm_seq2seq_internal_bid_encoder(inputs, targets, hparams, train): """The basic LSTM seq2seq model with bidirectional encoder.""" with tf.variable_scope("lstm_seq2seq_bid_encoder"): if inputs is not None: inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm_bid_encoder(inputs, inputs_length, hparams, train, "encoder") else: inputs_length = None final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def body(self, features): #print(2.1) if self._hparams.initializer == "orthogonal": raise ValueError("LSTM models fail with orthogonal initializer.") train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN inputs = features["targets"] encoder_outputs = common_layers.flatten4d3d(inputs) #print(inputs) shifted_targets = common_layers.shift_right(inputs) final_encoder_state = None size0, size1, size2, size3 = tf.shape(shifted_targets) #I think embedding may be handled by problem # Flatten inputs. inputs = common_layers.flatten4d3d(shifted_targets) #rnn=RNN(hparams,train) # LSTM decoder #decoder_output, _ = lstm_attention_decoder(inputs, self._hparams, train, "decoder",final_encoder_state, encoder_outputs) #decoder_output = LSTM_custom(inputs, self._hparams, train, "decoder",final_encoder_state, encoder_outputs)[0] #decoder_output, _ = lstm(inputs, self._hparams, train, "decoder") decoder_output, _ = lstm_SA(inputs, self._hparams, train, "decoder") return tf.expand_dims(decoder_output, axis=2)
def _build_inputs_and_targets(self, from_seqs=None, from_tags=None, to_seqs=None, to_tags=None): """Given from and to sequences and tags, construct inputs and targets.""" del from_tags # Unused. if from_seqs is not None: inputs = from_seqs inputs_length = common_layers.length_from_embedding(inputs) if to_tags is not None: # Add to-tags to the inputs and adjust lengths. # <float32> [batch_size, seq_len + 1, 1, emb_size]. inputs = tf.concat([to_tags, inputs], axis=1) inputs_length = inputs_length + 1 inputs = common_layers.flatten4d3d(inputs) else: inputs = None inputs_length = None if to_seqs is not None: # Shift to-sequences to form targets. # <float32> [batch_size, seq_len, 1, emb_size]. targets = common_layers.shift_right(to_seqs) # Add 1 to account for the padding added to the left from shift_right. targets_length = common_layers.length_from_embedding(targets) + 1 targets = common_layers.flatten4d3d(targets) else: targets = None targets_length = None return (inputs, inputs_length), (targets, targets_length)
def bytenet_internal(inputs, targets, hparams): """ByteNet, main step used for training.""" with tf.variable_scope("bytenet"): # Flatten inputs and extend length by 50%. inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2) extend_length = tf.to_int32(0.5 * tf.to_float(tf.shape(inputs)[1])) inputs_shape = inputs.shape.as_list() inputs = tf.pad(inputs, [[0, 0], [0, extend_length], [0, 0], [0, 0]]) inputs_shape[1] = None inputs.set_shape(inputs_shape) # Don't lose the other shapes when padding. # Pad inputs and targets to be the same length, divisible by 50. inputs, targets = common_layers.pad_to_same_length( inputs, targets, final_length_divisible_by=50) final_encoder = residual_dilated_conv(inputs, hparams.num_block_repeat, "SAME", "encoder", hparams) shifted_targets = common_layers.shift_right(targets) kernel = (hparams.kernel_height, hparams.kernel_width) decoder_start = common_layers.conv_block( tf.concat([final_encoder, shifted_targets], axis=3), hparams.hidden_size, [((1, 1), kernel)], padding="LEFT") return residual_dilated_conv(decoder_start, hparams.num_block_repeat, "LEFT", "decoder", hparams)
def lstm_seq2seq_internal_bid_encoder(inputs, targets, hparams, train): """The basic LSTM seq2seq model with bidirectional encoder.""" with tf.variable_scope("lstm_seq2seq_bid_encoder"): if inputs is not None: inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm_bid_encoder( inputs, inputs_length, hparams, train, "encoder") else: inputs_length = None final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding(shifted_targets) + 1 hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs, _ = lstm( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def bytenet_internal(inputs, targets, hparams): """ByteNet, main step used for training.""" with tf.variable_scope("bytenet"): # Flatten inputs and extend length by 50%. inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2) extend_length = tf.to_int32(0.5 * tf.to_float(tf.shape(inputs)[1])) inputs_shape = inputs.shape.as_list() inputs = tf.pad(inputs, [[0, 0], [0, extend_length], [0, 0], [0, 0]]) inputs_shape[1] = None inputs.set_shape( inputs_shape) # Don't lose the other shapes when padding. # Pad inputs and targets to be the same length, divisible by 50. inputs, targets = common_layers.pad_to_same_length( inputs, targets, final_length_divisible_by=50) final_encoder = residual_dilated_conv(inputs, hparams.num_block_repeat, "SAME", "encoder", hparams) shifted_targets = common_layers.shift_right(targets) kernel = (hparams.kernel_height, hparams.kernel_width) decoder_start = common_layers.conv_block( tf.concat([final_encoder, shifted_targets], axis=3), hparams.hidden_size, [((1, 1), kernel)], padding="LEFT") return residual_dilated_conv(decoder_start, hparams.num_block_repeat, "LEFT", "decoder", hparams)
def lstm_seq2seq_internal_dynamic(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): if inputs is not None: # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm( tf.reverse(inputs, axis=[1]), hparams, train, "encoder") else: final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", initial_state=final_encoder_state) # project the outputs with tf.variable_scope("projection"): projected_outputs=tf.layers.dense( decoder_outputs, 2048, activation=None, use_bias=False) return tf.expand_dims(projected_outputs, axis=2)
def testShiftLeft(self): x1 = np.zeros((5, 7, 1, 11)) x1[:, 0, :] = np.ones_like(x1[:, 0, :]) expected = np.zeros((5, 7, 1, 11)) expected[:, 1, :] = np.ones_like(expected[:, 1, :]) a = common_layers.shift_right(tf.constant(x1, dtype=tf.float32)) actual = self.evaluate(a) self.assertAllEqual(actual, expected)
def infer_step(result, length): """Inference step.""" def print_info(result, length, new_length): vocab = self._hparams.problem_hparams.vocabulary["targets"] tf.logging.info( "length=%s new_length=%s length_diff=%s new_suffix=%s", length, new_length, new_length - length, str([ vocab._subtoken_id_to_subtoken_string(index) # pylint: disable=protected-access for index in result[0, -block_size:, 0, 0][:new_length - length] ]).decode("unicode-escape"), ) features["targets"] = tf.pad(result, [[0, 0], [0, 1], [0, 0], [0, 0]]) samples, logits, losses = self.sample(features) # pylint: disable=unused-variable _, top_k_indices = tf.nn.top_k( logits[:, :-1, :1, :, :], k=self._decode_hparams.guess_and_check_top_k) in_top_k = tf.reduce_any(tf.equal(tf.to_int64(top_k_indices), tf.expand_dims(result, 4)), axis=4) eos_cumsum = tf.cumsum(tf.to_int32( tf.equal(result, text_encoder.EOS_ID)), axis=1) after_eos = tf.greater(common_layers.shift_right(eos_cumsum), 0) correct = tf.logical_and(in_top_k, tf.logical_not(after_eos)) correct_cumsum = tf.cumsum(tf.to_int32(correct), axis=1) perfect_cumsum = 1 + tf.range(tf.shape(correct)[1]) for axis in [0, 2, 3]: perfect_cumsum = tf.expand_dims(perfect_cumsum, axis=axis) new_length = tf.reduce_sum(tf.to_int32( tf.equal(correct_cumsum, perfect_cumsum)), axis=1) new_length = tf.squeeze(new_length, axis=[0, 1, 2]) new_length = tf.minimum(new_length, decode_length) new_result = tf.concat([ result[:, :new_length, :, :], tf.reshape(samples[:, new_length, :block_size, :], [1, block_size, 1, 1]) ], axis=1) with tf.control_dependencies( [tf.py_func(print_info, [result, length, new_length], [])]): new_result = tf.identity(new_result) return new_result, new_length
def body(self, features): inputs = features["inputs"] train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN encoder_outputs, final_encoder_state, encoder_decoder_attention_bias, inputs_length = \ self.encode(inputs, self._hparams) if "targets_actions" in features: targets = features["targets_actions"] else: tf.logging.warn( "CopySeq2Seq must be used with a SemanticParsing problem with a ShiftReduceGrammar; bad things will happen otherwise" ) targets = features["targets"] # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 shifted_targets = common_layers.flatten4d3d(shifted_targets) hparams_decoder = copy.copy(self._hparams) hparams_decoder.hidden_size = 2 * self._hparams.hidden_size decoder_output = lstm_attention_decoder(shifted_targets, hparams_decoder, train, "decoder", final_encoder_state, encoder_outputs, inputs_length, targets_length) decoder_output = tf.expand_dims(decoder_output, axis=2) body_output = dict() target_modality = self._problem_hparams.target_modality \ if self._problem_hparams else {"targets": None} assert self._hparams.pointer_layer in ("attentive", "decaying_attentive") for key, modality in target_modality.items(): if isinstance(modality, CopyModality): with tf.variable_scope("copy_layer/" + key): if self._hparams.pointer_layer == "decaying_attentive": output_layer = DecayingAttentivePointerLayer( encoder_outputs) else: output_layer = AttentivePointerLayer(encoder_outputs) scores = output_layer(decoder_output) scores += encoder_decoder_attention_bias body_output[key] = scores else: body_output[key] = decoder_output return body_output
def transformer_prepare_decoder(targets, hparams, features=None): """Prepare one shard of the model for the decoder. Args: targets: a Tensor. hparams: run hyperparameters features: optionally pass the entire features dictionary as well. This is needed now for "packed" datasets. Returns: decoder_input: a Tensor, bottom of decoder stack decoder_self_attention_bias: a bias tensor for use in encoder self-attention """ decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle( common_layers.shape_list(targets)[1])) if features and "targets_segmentation" in features: # "Packed" dataset - keep the examples from seeing each other. targets_segmentation = features["targets_segmentation"] targets_position = features["targets_position"] decoder_self_attention_bias += common_attention.attention_bias_same_segment( targets_segmentation, targets_segmentation) else: targets_position = None if hparams.proximity_bias: decoder_self_attention_bias += common_attention.attention_bias_proximal( common_layers.shape_list(targets)[1]) decoder_input = common_layers.shift_right_3d(targets) #if hparams.pos == "timing": # if targets_position is not None: # decoder_input = common_attention.add_timing_signal_1d_given_position( # decoder_input, targets_position) # else: # decoder_input = common_attention.add_timing_signal_1d(decoder_input) raw_decoder_input = common_layers.shift_right(features['targets_raw']) terminal_decoder_bias, nonterminal_decoder_bias = _get_t_nt_bias( raw_decoder_input, hparams, decoder_self_attention_bias) pop_decoder_bias = _get_pop_bias(raw_decoder_input, hparams) raw_decoder_input = tf.squeeze(raw_decoder_input, axis=[-2, -1]) pos_signals = generate_positional_signals(raw_decoder_input, hparams, terminal_decoder_bias, nonterminal_decoder_bias) pos_embeddings = generate_positional_embeddings(pos_signals, hparams.decoder_pos, hparams) if "sum" in hparams.decoder_pos_integration: decoder_input = decoder_input + pos_embeddings elif "ffn" in hparams.decoder_pos_integration: with tf.variable_scope("decoder_pos_ffn"): decoder_input = tf.concat([decoder_input, pos_embeddings], axis=2) decoder_input = transformer_ffn_layer(decoder_input, hparams, conv_padding="LEFT") return (decoder_input, decoder_self_attention_bias, terminal_decoder_bias, nonterminal_decoder_bias, pop_decoder_bias, pos_signals)
def model_fn_body(self, features): if self._hparams.initializer == "orthogonal": raise ValueError("LSTM models fail with orthogonal initializer.") train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN with tf.variable_scope("lstm_lm"): # Flatten and shift inputs. shifted_targets = common_layers.shift_right( features.get("targets")) inputs = common_layers.flatten4d3d(shifted_targets) outputs, _ = lstm.lstm(inputs, self._hparams, train, "lstm") return tf.expand_dims(outputs, axis=2)
def decode(cond_vec, cond_add, gold, c, ed, hparams): """Transformer decoder.""" drop_gold = tf.nn.dropout(gold, 1.0 - hparams.layer_prepostprocess_dropout) decoder_input = common_layers.shift_right(drop_gold, pad_value=cond_vec) if cond_add is not None: decoder_input += cond_add decoder_input = tf.squeeze(decoder_input, axis=2) decoder_input = common_attention.add_timing_signal_1d(decoder_input) bias = common_attention.attention_bias_lower_triangle(tf.shape(gold)[1]) if c is not None and len(c.get_shape()) > 3: c = tf.squeeze(c, axis=2) return transformer.transformer_decoder(decoder_input, c, bias, ed, hparams)
def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, hparams): """Middle part of slicenet, connecting encoder and decoder.""" def norm_fn(x, name): with tf.variable_scope(name, default_name="norm"): return common_layers.apply_norm(x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon) # Flatten targets and embed target_space_id. targets_flat = tf.expand_dims(common_layers.flatten4d3d(targets), axis=2) target_space_emb = tf.tile(target_space_emb, [tf.shape(targets_flat)[0], 1, 1, 1]) # Calculate similarity loss (but don't run if not needed). if len(hparams.problems) > 1 and hparams.sim_loss_mult > 0.00001: targets_timed = common_layers.add_timing_signal(targets_flat) extra_layers = int(hparams.num_hidden_layers * 1.5) with tf.variable_scope(tf.get_variable_scope(), reuse=True): targets_encoded = multi_conv_res(targets_timed, "SAME", "encoder", extra_layers, hparams) with tf.variable_scope("similarity_loss"): similarity_loss = similarity_cost(inputs_encoded, targets_encoded) similarity_loss *= hparams.sim_loss_mult else: similarity_loss = 0.0 # Use attention from each target to look at input and retrieve. targets_shifted = common_layers.shift_right(targets_flat, pad_value=target_space_emb) if hparams.attention_type == "none": targets_with_attention = tf.zeros_like(targets_shifted) else: inputs_padding_bias = (1.0 - mask) * -1e9 # Bias to not attend to padding. targets_with_attention = attention(targets_shifted, inputs_encoded, norm_fn, hparams, bias=inputs_padding_bias) # Positional targets: merge attention and raw. kernel = (hparams.kernel_height, hparams.kernel_width) targets_merged = common_layers.subseparable_conv_block( tf.concat([targets_with_attention, targets_shifted], axis=3), hparams.hidden_size, [((1, 1), kernel)], normalizer_fn=norm_fn, padding="LEFT", separability=4, name="targets_merge") return targets_merged, similarity_loss
def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, hparams): """Middle part of slicenet, connecting encoder and decoder.""" def norm_fn(x, name): with tf.variable_scope(name, default_name="norm"): return common_layers.apply_norm(x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon) # Flatten targets and embed target_space_id. targets_flat = tf.expand_dims(common_layers.flatten4d3d(targets), axis=2) target_space_emb = tf.tile(target_space_emb, [tf.shape(targets_flat)[0], 1, 1, 1]) # Calculate similarity loss (but don't run if not needed). if len(hparams.problems) > 1 and hparams.sim_loss_mult > 0.00001: targets_timed = common_layers.add_timing_signal(targets_flat) extra_layers = int(hparams.num_hidden_layers * 1.5) with tf.variable_scope(tf.get_variable_scope(), reuse=True): targets_encoded = multi_conv_res(targets_timed, "SAME", "encoder", extra_layers, hparams) with tf.variable_scope("similarity_loss"): similarity_loss = similarity_cost(inputs_encoded, targets_encoded) similarity_loss *= hparams.sim_loss_mult else: similarity_loss = 0.0 # Use attention from each target to look at input and retrieve. targets_shifted = common_layers.shift_right( targets_flat, pad_value=target_space_emb) if hparams.attention_type == "none": targets_with_attention = tf.zeros_like(targets_shifted) else: inputs_padding_bias = (1.0 - mask) * -1e9 # Bias to not attend to padding. targets_with_attention = attention( targets_shifted, inputs_encoded, norm_fn, hparams, bias=inputs_padding_bias) # Positional targets: merge attention and raw. kernel = (hparams.kernel_height, hparams.kernel_width) targets_merged = common_layers.subseparable_conv_block( tf.concat([targets_with_attention, targets_shifted], axis=3), hparams.hidden_size, [((1, 1), kernel)], normalizer_fn=norm_fn, padding="LEFT", separability=4, name="targets_merge") return targets_merged, similarity_loss
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention"): # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. encoder_outputs, final_encoder_state = lstm( tf.reverse(inputs, axis=[1]), hparams, train, "encoder") # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs) return tf.expand_dims(decoder_outputs, axis=2)
def _build_lm_inputs(self, features): """Builds inputs and targets for LM training.""" targets = features["targets"] target_tags = features["target_tags"] if self._hparams.mode == tf.estimator.ModeKeys.PREDICT: target_tags = tf.tile(target_tags, [self._hparams.beam_width, 1, 1, 1]) # Construct LM inputs. inputs = common_layers.shift_right(targets, pad_value=target_tags) inputs_length = common_layers.length_from_embedding(targets) + 1 inputs = common_layers.flatten4d3d(inputs) return inputs, inputs_length
def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm(tf.reverse(inputs, axis=[1]), hparams, train, "encoder") # LSTM decoder. shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def create_model_encode_decode( self, inputs, y_id): # inp[batch step 1 hid] yid[batch step 1 1] hparams = self.hparams train_flag = self.train_flag vocab_size = self.vocabsz embeddings_y = self.embeddings_y with tf.variable_scope("foo", reuse=tf.AUTO_REUSE): ### y embed y = tf.nn.embedding_lookup(embeddings_y, y_id) y = tf.squeeze(y, axis=3) # [? ? 1 hid] if len(inputs.shape) == 2: # [batch hid] inputs = tf.expand_dims(tf.expand_dims(inputs, axis=1), axis=1) inputs_length = common_layers.length_from_embedding( inputs) # [batch step 1 hid] # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) _, final_encoder_state = lstm_yr( inputs, inputs_length, hparams, train_flag, "encoder") # finale_encode_state must be lstmStateTuple ## # LSTM decoder. shifted_targets = common_layers.shift_right( y) # [46,23,78]->[0,46,23] | [batch step 1 hid] # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 decoder_outputs, _ = lstm_yr( common_layers.flatten4d3d(shifted_targets), targets_length, hparams, train_flag, "decoder", initial_state=final_encoder_state) # decode output [batch step hid] decoder_outputs = tf.layers.dense(inputs=decoder_outputs, units=vocab_size) # ->[batch step vocabsz] decoder_outputs = self.tensor3dto4d(decoder_outputs) return decoder_outputs
def gru_seq2seq_internal_attention_bid_encoder(inputs, targets, hparams, train): """GRU seq2seq model with attention, main step used for training.""" with tf.variable_scope("gru_seq2seq_attention_bid_encoder"): # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # GRU encoder. encoder_outputs, final_encoder_state = gru_bid_encoder( tf.reverse(inputs, axis=[1]), hparams, train, "encoder") # GRU decoder with attention shifted_targets = common_layers.shift_right(targets) hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs, _ = gru_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams_decoder, train, "decoder", final_encoder_state, encoder_outputs) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention"): # This is a temporary fix for varying-length sequences within in a batch. # A more complete fix should pass a length tensor from outside so that # all the lstm variants can use it. lengths = tf.reduce_sum( common_layers.mask_from_embedding(inputs), [1, 2, 3]) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. encoder_outputs, final_encoder_state = lstm( inputs, hparams, train, "encoder", lengths=lengths) # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs, lengths=lengths) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal_static(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): if inputs is not None: # Flatten inputs. inputs = tf.reverse(common_layers.flatten4d3d(inputs), axis=[1]) # Construct static rnn input list. # TODO: the length should be a parameter. input_list = [inputs[:, i, :] for i in range(21)] # LSTM encoder. _, final_encoder_state = lstm(input_list, hparams, train, "encoder") else: final_encoder_state = None input_list.clear() # LSTM decoder. # Get a list of tensors. shifted_trg = common_layers.flatten4d3d( common_layers.shift_right(targets)) target_list = [shifted_trg[:, i, :] for i in range(21)] decoder_outputs, _ = lstm(target_list, hparams, train, "decoder", initial_state=final_encoder_state) target_list.clear() # Convert decoder outputs to tensor. tensors = tf.transpose(tf.convert_to_tensor(decoder_outputs), perm=[1, 0, 2]) decoder_outputs.clear() # project the outputs with tf.variable_scope("projection"): projected_outputs = tf.layers.dense(tensors, 2048, activation=None, use_bias=False) return tf.expand_dims(projected_outputs, axis=2)
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train, inputs_length, targets_length): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention"): # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) encoder_outputs, final_encoder_state = lstm( inputs, inputs_length, hparams, train, "encoder") # LSTM decoder with attention. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = targets_length + 1 decoder_outputs = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs, inputs_length, targets_length) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): if inputs is not None: # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm( tf.reverse(inputs, axis=[1]), hparams, train, "encoder") else: final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_search_based_attention(inputs, targets, hparams, train, build_storage, storage, n): """LSTM seq2seq search-based model with attention""" with tf.variable_scope("lstm_seq2seq_attention", reuse=tf.AUTO_REUSE): # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. lstm_cell = tf.contrib.rnn.BasicLSTMCell(hparams.hidden_size) encoder_outputs, final_encoder_state = rnn( tf.reverse(inputs, axis=[1]), lstm_cell, hparams, train, "encoder") # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) decoder_outputs, p_copy = lstm_attention_search_based_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs, build_storage, storage, n) if build_storage: return tf.expand_dims(decoder_outputs, axis=2) else: return tf.expand_dims(decoder_outputs, axis=2), p_copy
def lstm_seq2seq_internal_bid_encoder(inputs, targets, hparams, train): """The basic LSTM seq2seq model with bidirectional encoder.""" with tf.variable_scope("lstm_seq2seq_bid_encoder"): if inputs is not None: # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm_bid_encoder( tf.reverse(inputs, axis=[1]), hparams, train, "encoder") else: final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs, _ = lstm(common_layers.flatten4d3d(shifted_targets), hparams_decoder, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal_attention_bid_encoder(inputs, targets, hparams, train): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention_bid_encoder"): inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. encoder_outputs, final_encoder_state = lstm_bid_encoder( inputs, inputs_length, hparams, train, "encoder") # LSTM decoder with attention shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding(shifted_targets) + 1 hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams_decoder, train, "decoder", final_encoder_state, encoder_outputs, inputs_length, targets_length) return tf.expand_dims(decoder_outputs, axis=2)
def lstm_seq2seq_internal_bid_encoder(inputs, targets, hparams, train): """The basic LSTM seq2seq model with bidirectional encoder.""" with tf.variable_scope("lstm_seq2seq_bid_encoder"): if inputs is not None: # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. _, final_encoder_state = lstm_bid_encoder( tf.reverse(inputs, axis=[1]), hparams, train, "encoder") else: final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) hparams_decoder = copy.copy(hparams) hparams_decoder.hidden_size = 2 * hparams.hidden_size decoder_outputs, _ = lstm( common_layers.flatten4d3d(shifted_targets), hparams_decoder, train, "decoder", initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2)
def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, hparams): """Middle part of slicenet, connecting encoder and decoder.""" def norm_fn(x, name): with tf.variable_scope(name, default_name="norm"): return common_layers.apply_norm(x, hparams.norm_type, hparams.model_d, hparams.norm_epsilon) # Flatten targets and embed target_space_id. targets_flat = tf.expand_dims(common_layers.flatten4d3d(targets), axis=2) target_space_emb = tf.tile(target_space_emb, [tf.shape(targets_flat)[0], 1, 1, 1]) # Use attention from each target to look at input and retrieve. targets_shifted = common_layers.shift_right( targets_flat, pad_value=target_space_emb) if hparams.attention_type == "none": targets_with_attention = tf.zeros_like(targets_shifted) else: inputs_padding_bias = (1.0 - mask) * -1e9 # Bias to not attend to padding. targets_with_attention = attention( targets_shifted, inputs_encoded, norm_fn, hparams, bias=inputs_padding_bias) # Positional targets: merge attention and raw. kernel = (hparams.kernel_height, hparams.kernel_width) targets_merged = common_layers.subseparable_conv_block( tf.concat([targets_with_attention, targets_shifted], axis=3), hparams.model_d, [((1, 1), kernel)], normalizer_fn=norm_fn, padding="LEFT", separability=4, name="targets_merge") return targets_merged, 0.0
def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): """LSTM seq2seq model with attention, main step used for training.""" with tf.variable_scope("lstm_seq2seq_attention"): # This is a temporary fix for varying-length sequences within in a batch. # A more complete fix should pass a length tensor from outside so that # all the lstm variants can use it. inputs_length = common_layers.length_from_embedding(inputs) # Flatten inputs. inputs = common_layers.flatten4d3d(inputs) # LSTM encoder. inputs = tf.reverse_sequence(inputs, inputs_length, seq_axis=1) encoder_outputs, final_encoder_state = lstm( inputs, inputs_length, hparams, train, "encoder") # LSTM decoder with attention. shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding(shifted_targets) + 1 decoder_outputs = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs, inputs_length, targets_length) return tf.expand_dims(decoder_outputs, axis=2)
def render2cmd_v3_internal(self, features, hparams, train): # inputs and targets are both sequences with # shape = [batch, seq_len, 1, hparams.problem.feature_dim] print( "render2cmd_v3_internal render2cmd_v3_internalrender2cmd_v3_internalrender2cmd_v3_internalrender2cmd_v3_internal" ) all_targets = features['targets'] all_targets_cls = features['targets_cls'] all_targets_font_cls = features['targets_fnt'] all_targets_psr = features['targets_psr'] all_batch_size = common_layers.shape_list(all_targets)[0] batch_size = all_batch_size // 2 sources = all_targets[:batch_size, ...] sources_cls = all_targets_cls[:batch_size, ...] sources_fnt = all_targets_font_cls[:batch_size, ...] sources_psr = all_targets_psr[:batch_size, ...] targets = all_targets[batch_size:, ...] targets_cls = all_targets_cls[batch_size:, ...] targets_fnt = all_targets_font_cls[batch_size:, ...] targets_psr = all_targets_psr[batch_size:, ...] losses = {} # sampled_bottleneck = self.pretrained_visual_encoder(features, hparams) # if hparams.sg_bottleneck: # sampled_bottleneck = tf.stop_gradient(sampled_bottleneck) # embd = self.cls_embedding(sources_cls, sources_fnt, targets_cls, targets_fnt) vis_embd = self.vis_encoder(sources_psr, targets_psr, targets_cls) # print("embd embd embd embd embd embd embd ", embd.shape) print("vis embd vis embd vis embd vis embd vis", vis_embd.shape) sampled_bottleneck = vis_embd with tf.variable_scope('render2cmd_v3_internal'): # override bottleneck, or return it, if requested # if 'bottleneck' in features: # if common_layers.shape_list(features['bottleneck'])[0] == 0: # # return sampled_bottleneck, # # set losses['training'] = 0 so self.top() doesn't get called on it # print("RETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURNRETURN") # return sampled_bottleneck, {'training': 0.0} # else: # # we want to use the given bottleneck # sampled_bottleneck = features['bottleneck'] # finalize bottleneck unbottleneck_dim = hparams.hidden_size * 2 # twice because using LSTM if hparams.twice_decoder: unbottleneck_dim = unbottleneck_dim * 2 dec_initial_state = [] # LSTM encoder _, encoder_output_states = self.lstm_encoder( common_layers.flatten4d3d(sources), hparams) print( "targets shape targets shape targets shape targets shape targets shape ", targets.shape) print('run stacking...') print( "sample bottleneck shape sample bottleneck shape sample bottleneck shape ", sampled_bottleneck.shape) print( "sources shape sources shape sources shape sources shape sources shape", sources.shape) # input() for hi in range(hparams.num_hidden_layers): unbottleneck = self.unbottleneck(sampled_bottleneck, unbottleneck_dim, name_append='_{}'.format(hi)) c, h = encoder_output_states[hi] # print(unbottleneck.shape) # print(c.shape, h.shape) # first_dim = common_layers.shape_list(unbottleneck)[0] # print(first_dim) # c = tf.tile(c,[first_dim,1]) # h = tf.tile(h,[first_dim,1]) # input() dec_initial_state.append( tf.nn.rnn_cell.LSTMStateTuple( c=tf.concat( [unbottleneck[:, :unbottleneck_dim // 2], c], 1), h=tf.concat( [unbottleneck[:, unbottleneck_dim // 2:], h], 1))) dec_initial_state = tuple(dec_initial_state) # print('checkshape dec_initial_state') # print(dec_initial_state) # input() shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 # LSTM decoder hparams_decoder = copy.copy(hparams) if hparams.twice_decoder: hparams_decoder.hidden_size = 2 * hparams.hidden_size if hparams.mode == tf.estimator.ModeKeys.PREDICT: decoder_outputs, _ = self.lstm_decoder_infer( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, targets_cls, train, initial_state=dec_initial_state, bottleneck=sampled_bottleneck) else: decoder_outputs, _ = self.lstm_decoder( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, targets_cls, train, initial_state=dec_initial_state, bottleneck=sampled_bottleneck) ret = tf.expand_dims(decoder_outputs, axis=2) return ret, losses
def render2cmd_v3_internal(self, features, hparams, train): # inputs and targets are both sequences with # shape = [batch, seq_len, 1, hparams.problem.feature_dim] targets = features['targets'] losses = {} sampled_bottleneck = self.pretrained_visual_encoder(features, hparams) if hparams.sg_bottleneck: sampled_bottleneck = tf.stop_gradient(sampled_bottleneck) with tf.variable_scope('render2cmd_v3_internal'): # override bottleneck, or return it, if requested if 'bottleneck' in features: if common_layers.shape_list(features['bottleneck'])[0] == 0: # return sampled_bottleneck, # set losses['training'] = 0 so self.top() doesn't get called on it return sampled_bottleneck, {'training': 0.0} else: # we want to use the given bottleneck sampled_bottleneck = features['bottleneck'] # finalize bottleneck unbottleneck_dim = hparams.hidden_size * 2 # twice because using LSTM if hparams.twice_decoder: unbottleneck_dim = unbottleneck_dim * 2 # unbottleneck back to LSTMStateTuple dec_initial_state = [] for hi in range(hparams.num_hidden_layers): unbottleneck = self.unbottleneck(sampled_bottleneck, unbottleneck_dim, name_append='_{}'.format(hi)) dec_initial_state.append( rnn.LSTMStateTuple( c=unbottleneck[:, :unbottleneck_dim // 2], h=unbottleneck[:, unbottleneck_dim // 2:])) dec_initial_state = tuple(dec_initial_state) shifted_targets = common_layers.shift_right(targets) # Add 1 to account for the padding added to the left from shift_right targets_length = common_layers.length_from_embedding( shifted_targets) + 1 # LSTM decoder hparams_decoder = copy.copy(hparams) if hparams.twice_decoder: hparams_decoder.hidden_size = 2 * hparams.hidden_size if hparams.mode == tf.estimator.ModeKeys.PREDICT: decoder_outputs, _ = self.lstm_decoder_infer( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, features['targets_cls'], train, initial_state=dec_initial_state, bottleneck=sampled_bottleneck) else: decoder_outputs, _ = self.lstm_decoder( common_layers.flatten4d3d(shifted_targets), targets_length, hparams_decoder, features['targets_cls'], train, initial_state=dec_initial_state, bottleneck=sampled_bottleneck) ret = tf.expand_dims(decoder_outputs, axis=2) return ret, losses