def __init__(self, cell, location_softmax, pointing_output, input_size, decoder_inputs=None, trainable=True, name=None, **kwargs): """Initializes a new PointingSoftmaxDecoder instance. See the class documentation for the escription of all the arguments. """ super(PointingSoftmaxDecoder, self).__init__( trainable=trainable, name=name, **kwargs) self._cell = cell self._loc = location_softmax self._out = pointing_output self._inp_size = input_size if decoder_inputs is not None: tensors = tf.transpose(decoder_inputs, [1, 0, 2]) dtype = tensors.dtype size = tf.shape(tensors)[0] element_shape = tensors.get_shape()[1:] tensor_array = tf.TensorArray(dtype=dtype, size=size, element_shape=element_shape) decoder_inputs = tensor_array.unstack(tensors) self._inputs_ta = decoder_inputs # infer the batch/location size from the `states` tensor # of the attention layer of the injected location softmax. states = self._loc.attention.states self._batch_size = utils.get_dimension(states, 0) self._loc_size = utils.get_dimension(states, 1)
def test_zero_output(self): """Test the .zero_output() method.""" batch_size = 2 timesteps = 5 shortlist_size = 3 output_size = shortlist_size + timesteps state_size = 9 input_size = 5 cell = mock.Mock() states = tf.placeholder(tf.float32, shape=[None, None, None]) location_softmax = mock.Mock() location_softmax.attention.states = states batch_size_t = utils.get_dimension(states, 0) timesteps_t = utils.get_dimension(states, 1) output_size_t = shortlist_size + timesteps_t zero_output_exp_shape = tf.stack([batch_size_t, output_size_t]) zero_output_exp_t = tf.zeros(zero_output_exp_shape) pointing_output = mock.Mock() pointing_output.zero_output.side_effect = [zero_output_exp_t] decoder = layers.PointingSoftmaxDecoder( cell=cell, location_softmax=location_softmax, pointing_output=pointing_output, input_size=input_size) zero_output_act_t = decoder.zero_output() batch_size_act_t, loc_size_act_t = tuple( pointing_output.zero_output.call_args[0]) zero_output_exp = np.zeros((batch_size, output_size)) pointing_output.zero_output.assert_called_once() self.assertEqual(zero_output_exp_t, zero_output_act_t) feed = {states: np.random.rand(batch_size, timesteps, state_size)} # pylint: disable=E1101,I0011 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) zero_output_act = sess.run(zero_output_act_t, feed) batch_size_act, loc_size_act = sess.run( [batch_size_act_t, loc_size_act_t], feed) self.assertAllEqual(zero_output_exp, zero_output_act) self.assertEqual(batch_size, batch_size_act) self.assertEqual(timesteps, loc_size_act)
def finished(self, time, output): """Check which sentences are finished. Arguments: time: a `Tensor` of rank `0D` (i.e. a scalar) with the 0-based value of the current step in the loop. output: a `Tensor` of rank `2D` and shape `[batch_size, num_classes]` representing the current output of the model, i.e. abatch of probability distribution estimations over the output classes. Returns: a `Tensor` of shape `[batch_size]` of `tf.bool` elements, indicating for each position if the corresponding sequence has terminated or not. A sequence is has terminated if the current step is greater or equal the number of steps allowed (defined in the `lengths` input argument) and if the `argmax` over the output probability distribution ends up in the class that has id equal to the `EOS` symbol (if provided). """ length = time + 1 finished = tf.greater_equal(length, self._lengths) if finished.get_shape().ndims == 0: batch = [utils.get_dimension(output, 0)] finished = tf.tile([finished], batch) if self._EOS is not None: ids = tf.cast(tf.argmax(output, axis=-1), tf.int32) eos = tf.equal(ids, self._EOS) finished = tf.logical_or(finished, eos) return finished
def test_dimension_out_of_bound(self): """A dimension index outside the tensor rank raises a IndexError.""" tensor = tf.placeholder(dtype=tf.float32, shape=[9, 23]) self.assertRaises(IndexError, utils.get_dimension, tensor, 2) for i in range(-len(tensor.shape), len(tensor.shape) - 1): self.assertIsNotNone(utils.get_dimension(tensor, i)) self.assertRaises(IndexError, utils.get_dimension, tensor, -3)
def test_unspecified_shape(self): """Test with unspecified tensor shape.""" tensor = tf.placeholder(dtype=tf.float32) shape = (9, 2, 3) data = np.ones(shape) rank = len(shape) dims = [utils.get_dimension(tensor, d) for d in range(rank)] with tf.Session() as sess: act_dims = sess.run(dims, {tensor: data}) for act, exp in zip(act_dims, shape): self.assertEqual(act, exp) invalid_dim = utils.get_dimension(tensor, rank) with tf.Session() as sess: self.assertRaises(tf.errors.InvalidArgumentError, sess.run, invalid_dim, {tensor: data})
def _call_helper(self, query): # pylint: disable=I0011,W0221 activations = self._attention(query) maxlen = utils.get_dimension(activations, -1) if self._sequence_length is not None: mask = tf.cast(tf.sequence_mask(self._sequence_length, maxlen), tf.float32) else: mask = None location = ops.softmax(activations, mask) weights = tf.expand_dims(location, axis=2) context = tf.reduce_sum(self._attention.states * weights, axis=1) return location, context
def test_default(self): """Basic test for the `liteflow.utils.get_dimension`.""" exp_dim_0, exp_dim_1, exp_dim_2 = 9, 2, 3 tensor = tf.placeholder(dtype=tf.float32, shape=[None, exp_dim_1, exp_dim_2]) data = np.ones((exp_dim_0, exp_dim_1, exp_dim_2)) dim_0 = utils.get_dimension(tensor, 0) dim_1 = utils.get_dimension(tensor, 1, ensure_tensor=True) dim_2 = utils.get_dimension(tensor, 2) # dim_0 and dim_1 are tensors and must be evaluated. with tf.Session() as sess: act_dim_0, act_dim_1 = sess.run([dim_0, dim_1], {tensor: data}) self.assertEqual(exp_dim_0, act_dim_0) self.assertEqual(exp_dim_1, act_dim_1) # dim_2 is an integer and is equal to the expected value. self.assertEqual(int, type(dim_2)) self.assertEqual(exp_dim_2, dim_2)
def _call_helper(self): time = tf.constant(0, dtype=tf.int32) inp = self._decoder.init_input() state = self._decoder.init_state() finished = tf.tile([False], [utils.get_dimension(inp, 0)]) output_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) loop_vars = [time, inp, state, finished, output_ta] results = tf.while_loop( cond=self.cond, body=self.body, loop_vars=loop_vars, parallel_iterations=self._parallel_iterations, swap_memory=self._swap_memory) output_ta = results[-1] output = output_ta.stack() output = tf.transpose(output, [1, 0, 2]) state = results[2] return output, state
def test_iterations(self): """Test the number of iterations.""" lengths = tf.constant([1, 2, 3], dtype=tf.int32) def _helper_finished(time, _): return tf.greater_equal(time + 1, lengths) helper = mock.Mock() helper.finished.side_effect = _helper_finished batch_size = utils.get_dimension(lengths, 0) inp_size, state_size, output_size = 2, 5, 2 decoder = mock.Mock() decoder.init_input.side_effect = lambda: tf.zeros( [batch_size, inp_size]) decoder.init_state.side_effect = lambda: tf.ones( [batch_size, state_size]) decoder.zero_output.side_effect = lambda: tf.zeros( [batch_size, output_size]) decoder.step.side_effect = lambda t, i, s:\ ((i + 1), 3 * (i + 1), (s + 2), tf.tile([False], [batch_size])) output_exp = np.asarray( [[[1, 1], [0, 0], [0, 0]], [[1, 1], [4, 4], [0, 0]], [[1, 1], [4, 4], [13, 13]]], dtype=np.float32) # pylint: disable=E1101,I0011 state_exp = np.asarray( [[7, 7, 7, 7, 7], [7, 7, 7, 7, 7], [7, 7, 7, 7, 7]], dtype=np.float32) # pylint: disable=E1101,I0011 dyndec = layers.DynamicDecoder(decoder, helper) output_t, state_t = dyndec.decode() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) output_act, state_act = sess.run([output_t, state_t]) self.assertAllEqual(output_exp, output_act) self.assertAllEqual(state_exp, state_act)
def zero_attention_context(self, batch_size): """A tensor representing the zero attention context.""" state_size = utils.get_dimension(self.attention.states, 2) shape = tf.stack([batch_size, state_size]) return tf.zeros(shape=shape, dtype=tf.float32)
def zero_location_softmax(self, batch_size): """A tensor representign the zero location softmax.""" location_size = utils.get_dimension(self.attention.states, 1) shape = tf.stack([batch_size, location_size]) return tf.zeros(shape=shape, dtype=tf.float32)
def _build_sequences_and_lengths(self): sequences = tf.placeholder(dtype=tf.int32, shape=[None, None]) batch = utils.get_dimension(sequences, 0) length = utils.get_dimension(sequences, 1) lengths = length * tf.ones(dtype=tf.int32, shape=[batch]) return sequences, lengths
def test_iterations_step_by_step(self): """Test the number of iterations (step by step).""" # pylint: disable=C0103,I0011 T, F = True, False bt = lambda *args: tf.convert_to_tensor(list(args), dtype=tf.bool) # pylint: enable=C0103,I0011 init_value = [[.1, .1], [.2, .2], [.3, .3]] inp = tf.placeholder(tf.float32, shape=[None, None]) state = 2 * inp # next_input = 3 * init_input # next_state = 4 * init_input zero_output = tf.zeros_like(inp) out01 = 10 * inp out02 = 20 * inp out03 = 30 * inp decoder = mock.Mock() decoder.init_input.side_effect = [inp] decoder.init_state.side_effect = [state] decoder.zero_output.return_value = zero_output decoder_finished_00 = bt(F, F, F) decoder_finished_01 = bt(F, F, T) decoder_finished_02 = bt(F, F, T) decoder.step.side_effect = [ (out01, inp, state, decoder_finished_00), # time=0 (out02, inp, state, decoder_finished_01), # time=1 (out03, inp, state, decoder_finished_02) ] # time=2 helper = mock.Mock() helper_finished_00 = bt(F, F, F) helper_finished_01 = bt(F, T, F) helper_finished_02 = bt(T, F, F) helper.finished.side_effect = [ helper_finished_00, # time=0 helper_finished_01, # time=1 helper_finished_02 ] # time=2 # STEP BY STEP EVALUATION OF `finished` flags. dyndec = layers.DynamicDecoder(decoder, helper) time = tf.constant(0, dtype=tf.int32) finished = tf.tile([F], [utils.get_dimension(inp, 0)]) output_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) # time=0 next_finished_00_exp = [F, F, F] results_00 = dyndec.body(time, inp, state, finished, output_ta) time = results_00[0] finished = results_00[3] results_00[-1].stack() feed = {inp: init_value} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) next_finished_00_act = sess.run(finished, feed) self.assertEqual(1, sess.run(time, feed)) self.assertAllEqual(next_finished_00_exp, next_finished_00_act) # time=1 cond_01_t = dyndec.cond(*results_00) cond_01_exp = True feed = {inp: init_value} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) cond_01_act = sess.run(cond_01_t, feed) self.assertEqual(cond_01_exp, cond_01_act) next_finished_01_exp = [F, T, T] results_01 = dyndec.body(time, inp, state, finished, output_ta) time = results_01[0] finished = results_01[3] results_01[-1].stack() feed = {inp: init_value} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) self.assertEqual(2, sess.run(time, feed)) next_finished_01_act = sess.run(finished, feed) self.assertAllEqual(next_finished_01_exp, next_finished_01_act) # time=2 cond_02_t = dyndec.cond(*results_01) cond_02_exp = True feed = {inp: init_value} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) cond_02_act = sess.run(cond_02_t, feed) self.assertEqual(cond_02_exp, cond_02_act) next_finished_02_exp = [T, T, T] results_02 = dyndec.body(time, inp, state, finished, output_ta) time = results_02[0] finished = results_02[3] results_02[-1].stack() feed = {inp: init_value} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) self.assertEqual(3, sess.run(time, feed)) next_finished_02_act = sess.run(finished, feed) self.assertAllEqual(next_finished_02_exp, next_finished_02_act) # time=3 cond_03_t = dyndec.cond(*results_02) cond_03_exp = False # STOP! feed = {inp: init_value} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) cond_03_act = sess.run(cond_03_t, feed) self.assertEqual(cond_03_exp, cond_03_act)
def test_step_without_decoder_inputs(self): # pylint: disable=C0103 """Test the .step() method when decoder inputs are not available (inference).""" batch_size = 2 timesteps = 10 shortlist_size = 3 output_size = shortlist_size + timesteps # 13 state_size = 9 input_size = 11 cell_out_size = 4 cell_state_size = 7 # DEFINE ATTENTION STATES AND (variable) DIMENSIONS. # The `states` variable, even if not used, is the reference # tensor for the dimensionality of the problem and represents # the attention states of the model. states = tf.placeholder(dtype=tf.float32, shape=[None, None, None]) batch_dim = utils.get_dimension(states, 0) timesteps_dim = utils.get_dimension(states, 1) state_dim = utils.get_dimension(states, 2) output_dim = shortlist_size + timesteps_dim # RECURRENT CELL. out_cell_out = 8 * tf.ones(shape=tf.stack([batch_dim, cell_out_size])) out_cell_state = 14 * tf.ones( shape=tf.stack([batch_dim, cell_state_size])) cell = mock.Mock() cell.side_effect = [(out_cell_out, out_cell_state)] # LOCATION SOFTMAX (and attention). location = 12 * tf.ones(dtype=tf.float32, shape=[batch_dim, timesteps_dim]) attention = 13 * tf.ones(dtype=tf.float32, shape=[batch_dim, state_dim]) location_softmax = mock.Mock() location_softmax.attention.states = states location_softmax.side_effect = [(location, attention)] # OUTPUT. out_output = 9 * tf.ones(shape=tf.stack([batch_dim, output_dim])) pointing_output = mock.Mock() pointing_output.side_effect = [out_output] # INPUT TENSORS: time, inp, (cell_out, cell_state) in_time = tf.constant(0, dtype=tf.int32) in_inp = tf.ones(shape=tf.stack([batch_dim, input_size])) in_cell_out = 4 * tf.ones(shape=tf.stack([batch_dim, cell_out_size])) in_cell_state = 7 * tf.ones( shape=tf.stack([batch_dim, cell_state_size])) in_state = (in_cell_out, in_cell_state) in_step_args = (in_time, in_inp, in_state) # ACTUAL OUT TENSORS. decoder = layers.PointingSoftmaxDecoder( cell=cell, location_softmax=location_softmax, pointing_output=pointing_output, input_size=input_size) output_t, next_inp_t, next_state_t, finished_t = decoder.step( *in_step_args) next_cell_out_t, next_cell_state_t = next_state_t # TENSOR IDENTITY ASSERTIONS. # 1. Assert that the location and attention are calculated # with the previous step cell output tensor (in_cell_out). location_softmax.assert_called_once_with(in_cell_out) # 2. Assert that the cell state that has been passed to the inner # recurrent cell is the one coming from the previous step (in_cell_state). # (apparently pylint doesn't recognize `cell` as a callable mock?) # pylint: disable=E1136 cell_input_t, in_cell_state_t = tuple(cell.call_args[0]) self.assertEqual(in_cell_state, in_cell_state_t) # 3. Assert that the pointing output has been invoked with the # output of the recurrent cell (out_cell_out), the location tensor # (location) and the attention context tensor (attention). pointing_output.assert_called_once_with(out_cell_out, location, attention) # Actualize the state. # (actually pylint doesn't recognize np.random.rand()) # pylint: disable=E1101 states_np = np.random.rand(batch_size, timesteps, state_size) # EXPECTED OUTPUT VALUES for the .step() method. output_exp = 9 * np.ones((batch_size, output_size)) next_inp_exp = 9 * np.ones((batch_size, input_size)) next_cell_out_exp = 8 * np.ones((batch_size, cell_out_size)) next_cell_state_exp = 14 * np.ones((batch_size, cell_state_size)) finished_exp = np.asarray([False] * batch_size, np.bool) # Re-built the recurrent cell input as the concatenation # of the cell output, the attention context vector and the # current input. cell_input_rebuilt_t = tf.concat([in_cell_out, attention, in_inp], axis=1) feed = {states: states_np} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # .step() outputs. self.assertAllEqual(output_exp, sess.run(output_t, feed)) self.assertAllEqual(next_inp_exp, sess.run(next_inp_t, feed)) self.assertAllEqual(next_cell_out_exp, sess.run(next_cell_out_t, feed)) self.assertAllEqual(next_cell_state_exp, sess.run(next_cell_state_t, feed)) self.assertAllEqual(finished_exp, sess.run(finished_t, feed)) # recurrent cell input. cell_input_exp = sess.run(cell_input_rebuilt_t, feed) cell_input_act = sess.run(cell_input_t, feed) self.assertAllEqual(cell_input_exp, cell_input_act)
def _build_graph(self): trainable = self.mode == tf.contrib.learn.ModeKeys.TRAIN words = self.inputs.get(self.inputs.WORDS_KEY) slengths = self.inputs.get(self.inputs.SENTENCE_LENGTH_KEY) targets = self.inputs.get(self.inputs.FORMULA_KEY) flengths = self.inputs.get(self.inputs.FORMULA_LENGTH_KEY) with self._graph.as_default(): # pylint: disable=E1129 with tf.variable_scope('Embedding'): # pylint: disable=E1129 with tf.device('CPU:0'): embedding_size = self._params['embedding_size'] vocabulary_size = self._params[self.INPUT_VOC_SIZE_PK] embeddings = tf.get_variable( 'E', [vocabulary_size, embedding_size]) inputs = tf.nn.embedding_lookup(embeddings, words) batch_dim = utils.get_dimension(words, 0) with tf.variable_scope('Encoder'): # pylint: disable=E1129 encoder_params = self._params['encoder'] encoder_cell_type = encoder_params['cell.type'] encoder_cell_params = encoder_params['cell.params'] encoder_cell = configurable.factory(encoder_cell_type, self._mode, encoder_cell_params, rnn) state = encoder_cell.zero_state(batch_dim, tf.float32) encoder_out, _ = tf.nn.dynamic_rnn( cell=encoder_cell, initial_state=state, inputs=inputs, sequence_length=slengths, parallel_iterations=self._params['parallel_iterations']) with tf.variable_scope('Decoder'): # pylint: disable=E1129 decoder_params = self._params['decoder'] decoder_cell_type = decoder_params['cell.type'] decoder_cell_params = decoder_params['cell.params'] decoder_cell = configurable.factory(decoder_cell_type, self._mode, decoder_cell_params, rnn) attention = layers.BahdanauAttention( states=encoder_out, inner_size=self._params['attention_size'], trainable=trainable) location = layers.LocationSoftmax(attention=attention, sequence_length=slengths) output = layers.PointingSoftmaxOutput( shortlist_size=self._params[self.OUTPUT_VOC_SIZE_PK], decoder_out_size=decoder_cell.output_size, state_size=encoder_out.shape[-1].value, trainable=trainable) self._decoder_inputs = None if trainable: location_size = utils.get_dimension(words, 1) output_size = self._params[ self.OUTPUT_VOC_SIZE_PK] + location_size self._decoder_inputs = tf.one_hot( targets, output_size, dtype=tf.float32, name='decoder_training_input') ps_decoder = layers.PointingSoftmaxDecoder( cell=decoder_cell, location_softmax=location, pointing_output=output, input_size=self._params['feedback_size'], decoder_inputs=self._decoder_inputs, trainable=trainable) eos = None if trainable else self.EOS_IDX pad_to = None if trainable else utils.get_dimension(targets, 1) helper = layers.TerminationHelper(lengths=flengths, EOS=eos) decoder = layers.DynamicDecoder( decoder=ps_decoder, helper=helper, pad_to=pad_to, parallel_iterations=self._params['parallel_iterations'], swap_memory=False) self._predictions, _ = decoder.decode()
def _build_graph(self): trainable = self.mode == tf.contrib.learn.ModeKeys.TRAIN words = self.inputs.get(self.inputs.WORDS_KEY) slengths = self.inputs.get(self.inputs.SENTENCE_LENGTH_KEY) targets = self.inputs.get(self.inputs.FORMULA_KEY) flengths = self.inputs.get(self.inputs.FORMULA_LENGTH_KEY) with self._graph.as_default(): # pylint: disable=E1129 if self._seed: tf.set_random_seed(self._seed) with tf.variable_scope('Embedding'): # pylint: disable=E1129 with tf.device('CPU:0'): embedding_size = self._params['embedding_size'] vocabulary_size = self._params[self.INPUT_VOC_SIZE_PK] embeddings = tf.get_variable( 'E', [vocabulary_size, embedding_size]) inputs = tf.nn.embedding_lookup(embeddings, words) batch_dim = utils.get_dimension(words, 0) with tf.variable_scope('Encoder'): # pylint: disable=E1129 encoder_params = self._params['encoder'] encoder_cell_type = encoder_params['cell.type'] encoder_cell_params = encoder_params['cell.params'] encoder_cell = configurable.factory(encoder_cell_type, self._mode, encoder_cell_params, rnn) state = encoder_cell.zero_state(batch_dim, tf.float32) encoder_out, _ = tf.nn.dynamic_rnn( cell=encoder_cell, initial_state=state, inputs=inputs, sequence_length=slengths, parallel_iterations=self._params['parallel_iterations']) with tf.variable_scope('Decoder'): # pylint: disable=E1129 decoder_params = self._params['decoder'] decoder_cell_type = decoder_params['cell.type'] decoder_cell_params = decoder_params['cell.params'] decoder_cell = configurable.factory(decoder_cell_type, self._mode, decoder_cell_params, rnn) attention = layers.BahdanauAttention( states=encoder_out, inner_size=self._params['attention_size'], trainable=trainable) location = layers.LocationSoftmax( attention=attention, sequence_length=slengths) output = layers.PointingSoftmaxOutput( shortlist_size=self._params[self.OUTPUT_VOC_SIZE_PK], decoder_out_size=decoder_cell.output_size, state_size=encoder_out.shape[-1].value, trainable=trainable) self._decoder_inputs = None if trainable: location_size = utils.get_dimension(words, 1) output_size = self._params[self.OUTPUT_VOC_SIZE_PK] + location_size self._decoder_inputs = tf.one_hot( targets, output_size, dtype=tf.float32, name='decoder_training_input') ps_decoder = layers.PointingSoftmaxDecoder( cell=decoder_cell, location_softmax=location, pointing_output=output, input_size=self._params['feedback_size'], decoder_inputs=self._decoder_inputs, trainable=trainable) eos = None if trainable else self.EOS_IDX pad_to = None if trainable else utils.get_dimension(targets, 1) helper = layers.TerminationHelper( lengths=flengths, EOS=eos) decoder = layers.DynamicDecoder( decoder=ps_decoder, helper=helper, pad_to=pad_to, parallel_iterations=self._params['parallel_iterations'], swap_memory=False) self._predictions, _ = decoder.decode()