Example #1
0
 def AddFCLayer(self, prev_layer, index, reuse=None):
     """Parse expression and add Fully Connected Layer.
 
     Args:
       prev_layer: Input tensor.
       index:      Position in model_str to start parsing
 
     Returns:
       Output tensor, end index in model_str.
     """
     pattern = re.compile(R'(F)(s|t|r|l|m)({\w+})?(\d+)')
     m = pattern.match(self.model_str, index)
     if m is None:
         return None, None
     fn = self._NonLinearity(m.group(2))
     name = self._GetLayerName(m.group(0), index, m.group(3))
     depth = int(m.group(4))
     input_depth = shapes.tensor_dim(prev_layer, 1) * shapes.tensor_dim(
         prev_layer, 2) * shapes.tensor_dim(prev_layer, 3)
     # The slim fully connected is actually a 1x1 conv, so we have to crush the
     # dimensions on input.
     # Everything except batch goes to depth, and therefore has to be known.
     shaped = tf.reshape(
         prev_layer, [-1, input_depth], name=name + '_reshape_in')
     output = slim.fully_connected(shaped, depth, activation_fn=fn, scope=name, reuse=reuse)
     # Width and height are collapsed to 1.
     self.reduction_factors[1] = None
     self.reduction_factors[2] = None
     return tf.reshape(
         output, [shapes.tensor_dim(prev_layer, 0), 1, 1, depth],
         name=name + '_reshape_out'), m.end()
Example #2
0
    def _TFLSTMLayer(self, prev_layer, direction, dim, summarize, depth, name, reuse=None):
        # If the target dimension is y, we need to transpose.
        if dim == 'x':
            lengths = self.GetLengths(2, 1)
            inputs = prev_layer
        else:
            lengths = self.GetLengths(1, 1)
            inputs = tf.transpose(prev_layer, [0, 2, 1, 3], name=name + '_ytrans_in')
        input_batch = shapes.tensor_dim(inputs, 0)
        num_slices = shapes.tensor_dim(inputs, 1)
        num_steps = shapes.tensor_dim(inputs, 2)
        input_depth = shapes.tensor_dim(inputs, 3)

        # Reshape away the other dimension.
        inputs = tf.reshape(
            inputs, [-1, num_steps, input_depth], name=name + '_reshape_in')
        # We need to replicate the lengths by the size of the other dimension, and
        # any changes that have been made to the batch dimension.
        tile_factor = tf.to_float(input_batch *
                                  num_slices) / tf.to_float(tf.shape(lengths)[0])
        lengths = tf.tile(lengths, [tf.cast(tile_factor, tf.int32)])
        lengths = tf.cast(lengths, tf.int64)
        outputs = nn_ops.tfrnn_helper(
            inputs,
            lengths,
            cell_type='lstm',
            num_nodes=depth,
            direction=direction,
            name=name,
            reuse=reuse
        )
        # Output depth is doubled if bi-directional.
        if direction == 'bidirectional':
            output_depth = depth * 2
        else:
            output_depth = depth
        # Restore the other dimension.
        if summarize:
            outputs = tf.slice(
                outputs, [0, num_steps - 1, 0], [-1, 1, -1], name=name + '_sum_slice')
            outputs = tf.reshape(
                outputs, [input_batch, num_slices, 1, output_depth],
                name=name + '_reshape_out')
        else:
            outputs = tf.reshape(
                outputs, [input_batch, num_slices, num_steps, output_depth],
                name=name + '_reshape_out')
        if dim == 'y':
            outputs = tf.transpose(outputs, [0, 2, 1, 3], name=name + '_ytrans_out')
        return outputs
    def _AddOutputLayer(self,
                        prev_layer,
                        out_dims,
                        out_func,
                        num_classes,
                        reuse=None,
                        sc=None):
        # Reduce dimensionality appropriate to the output dimensions.
        batch_in = shapes.tensor_dim(prev_layer, dim=0)
        height_in = shapes.tensor_dim(prev_layer, dim=1)
        width_in = shapes.tensor_dim(prev_layer, dim=2)
        depth_in = shapes.tensor_dim(prev_layer, dim=3)
        if out_dims:
            # Combine any remaining height and width with batch and unpack after.
            shaped = tf.reshape(prev_layer, [-1, depth_in])
        else:
            # Everything except batch goes to depth, and therefore has to be known.
            shaped = tf.reshape(prev_layer,
                                [-1, height_in * width_in * depth_in])
        logits = slim.fully_connected(shaped,
                                      num_classes,
                                      activation_fn=None,
                                      reuse=reuse,
                                      scope=sc)
        if out_func == 'l':
            raise ValueError('Logistic not yet supported!')
        else:
            output = tf.nn.softmax(logits)
        # Reshape to the dessired output.
        if out_dims == 2:
            output_shape = [batch_in, height_in, width_in, num_classes]
        elif out_dims == 1:
            output_shape = [batch_in, height_in * width_in, num_classes]
        else:
            output_shape = [batch_in, num_classes]

        output = tf.reshape(output, output_shape, name='Output')
        logits = tf.reshape(logits, output_shape, name='Logits')
        return logits, output
 def _AddOutputs(self,
                 prev_layer,
                 out_dims,
                 out_func,
                 num_classes,
                 reuse=None,
                 sc=None):
     logits, outputs = self._AddOutputLayer(prev_layer,
                                            out_dims,
                                            out_func,
                                            num_classes,
                                            reuse=reuse,
                                            sc=sc)
     height_in = shapes.tensor_dim(prev_layer, dim=1)
     self.ctc_width = self.layers.GetLengths(dim=2, factor=height_in)
     self.logits = logits
     self.output = outputs
Example #5
0
 def _LSTMLayer(self, prev_layer, direction, dim, summarize, depth, name, reuse=None):
     """Adds an LSTM layer with the given pre-parsed attributes.
 
     Always maps 4-D to 4-D regardless of summarize.
     Args:
       prev_layer: Input tensor.
       direction:  'forward' 'backward' or 'bidirectional'
       dim:        'x' or 'y', dimension to consider as time.
       summarize:  True if we are to return only the last timestep.
       depth:      Output depth.
       name:       Some string naming the op.
 
     Returns:
       Output tensor.
     """
     # If the target dimension is y, we need to transpose.
     if dim == 'x':
         lengths = self.GetLengths(2, 1)
         inputs = prev_layer
     else:
         lengths = self.GetLengths(1, 1)
         inputs = tf.transpose(prev_layer, [0, 2, 1, 3], name=name + '_ytrans_in')
     input_batch = shapes.tensor_dim(inputs, 0)
     num_slices = shapes.tensor_dim(inputs, 1)
     num_steps = shapes.tensor_dim(inputs, 2)
     input_depth = shapes.tensor_dim(inputs, 3)
     # Reshape away the other dimension.
     inputs = tf.reshape(
         inputs, [-1, num_steps, input_depth], name=name + '_reshape_in')
     # We need to replicate the lengths by the size of the other dimension, and
     # any changes that have been made to the batch dimension.
     tile_factor = tf.to_float(input_batch *
                               num_slices) / tf.to_float(tf.shape(lengths)[0])
     lengths = tf.tile(lengths, [tf.cast(tile_factor, tf.int32)])
     lengths = tf.cast(lengths, tf.int64)
     outputs = nn_ops.rnn_helper(
         inputs,
         lengths,
         cell_type='lstm',
         num_nodes=depth,
         direction=direction,
         name=name,
         stddev=0.1,
         reuse=reuse
     )
     # Output depth is doubled if bi-directional.
     if direction == 'bidirectional':
         output_depth = depth * 2
     else:
         output_depth = depth
     # Restore the other dimension.
     if summarize:
         outputs = tf.slice(
             outputs, [0, num_steps - 1, 0], [-1, 1, -1], name=name + '_sum_slice')
         outputs = tf.reshape(
             outputs, [input_batch, num_slices, 1, output_depth],
             name=name + '_reshape_out')
     else:
         outputs = tf.reshape(
             outputs, [input_batch, num_slices, num_steps, output_depth],
             name=name + '_reshape_out')
     if dim == 'y':
         outputs = tf.transpose(outputs, [0, 2, 1, 3], name=name + '_ytrans_out')
     return outputs
Example #6
0
def lstm_layer(inp,
               length=None,
               state=None,
               memory=None,
               num_nodes=None,
               backward=False,
               clip=100.0,
               reg_func=tf.nn.l2_loss,
               weight_reg=False,
               weight_collection="LSTMWeights",
               bias_reg=False,
               stddev=None,
               seed=None,
               decode=False,
               use_native_weights=False,
               name=None,
               reuse=None):
    """Adds ops for an LSTM layer.

    This adds ops for the following operations:

      input => (forward-LSTM|backward-LSTM) => output

    The direction of the LSTM is determined by `backward`. If it is false, the
    forward LSTM is used, the backward one otherwise.

    Args:
      inp: A 3-D tensor of shape [`batch_size`, `max_length`, `feature_dim`].
      length: A 1-D tensor of shape [`batch_size`] and type int64. Each element
              represents the length of the corresponding sequence in `inp`.
      state: If specified, uses it as the initial state.
      memory: If specified, uses it as the initial memory.
      num_nodes: The number of LSTM cells.
      backward: If true, reverses the `inp` before adding the ops. The output is
                also reversed so that the direction is the same as `inp`.
      clip: Value used to clip the cell values.
      reg_func: Function used for the weight regularization such as
                `tf.nn.l2_loss`.
      weight_reg: If true, regularize the filter weights with `reg_func`.
      weight_collection: Collection to add the weights to for regularization.
      bias_reg: If true, regularize the bias vector with `reg_func`.
      stddev: Standard deviation used to initialize the variables.
      seed: Seed used to initialize the variables.
      decode: If true, does not add ops which are not used for inference.
      use_native_weights: If true, uses weights in the same format as the native
                          implementations.
      name: Name of the op.

    Returns:
      A 3-D tensor of shape [`batch_size`, `max_length`, `num_nodes`].
    """
    with tf.variable_scope(name, reuse=reuse):
        if backward:
            if length is None:
                inp = tf.reverse(inp, [1])
            else:
                inp = tf.reverse_sequence(inp, length, 1, 0)

        num_prev = inp.get_shape()[2]
        if stddev:
            initializer = tf.truncated_normal_initializer(stddev=stddev,
                                                          seed=seed)
        else:
            initializer = tf.uniform_unit_scaling_initializer(seed=seed)

        if use_native_weights:
            with tf.variable_scope("LSTMCell"):
                w = tf.get_variable(
                    "W_0",
                    shape=[num_prev + num_nodes, 4 * num_nodes],
                    initializer=initializer,
                    dtype=tf.float32)
                w_i_m = tf.slice(w, [0, 0], [num_prev, 4 * num_nodes],
                                 name="w_i_m")
                w_m_m = tf.reshape(tf.slice(w, [num_prev, 0],
                                            [num_nodes, 4 * num_nodes]),
                                   [num_nodes, 4, num_nodes],
                                   name="w_m_m")
        else:
            w_i_m = tf.get_variable("w_i_m", [num_prev, 4 * num_nodes],
                                    initializer=initializer)
            w_m_m = tf.get_variable("w_m_m", [num_nodes, 4, num_nodes],
                                    initializer=initializer)

        if not decode and weight_reg:
            tf.add_to_collection(weight_collection,
                                 reg_func(w_i_m, name="w_i_m_reg"))
            tf.add_to_collection(weight_collection,
                                 reg_func(w_m_m, name="w_m_m_reg"))

        batch_size = shapes.tensor_dim(inp, dim=0)
        num_frames = shapes.tensor_dim(inp, dim=1)
        prev = tf.reshape(inp, tf.stack([batch_size * num_frames, num_prev]))

        if use_native_weights:
            with tf.variable_scope("LSTMCell"):
                b = tf.get_variable("B",
                                    shape=[4 * num_nodes],
                                    initializer=tf.zeros_initializer(),
                                    dtype=tf.float32)
            biases = tf.identity(b, name="biases")
        else:
            biases = tf.get_variable("biases", [4 * num_nodes],
                                     initializer=tf.constant_initializer(0.0))
        if not decode and bias_reg:
            tf.add_to_collection(weight_collection,
                                 reg_func(biases, name="biases_reg"))
        prev = tf.nn.xw_plus_b(prev, w_i_m, biases)

        prev = tf.reshape(prev,
                          tf.stack([batch_size, num_frames, 4, num_nodes]))
        if state is None:
            state = tf.fill(tf.stack([batch_size, num_nodes]), 0.0)
        if memory is None:
            memory = tf.fill(tf.stack([batch_size, num_nodes]), 0.0)

        out, _, mem = rnn.variable_lstm(prev, state, memory, w_m_m, clip=clip)

        if backward:
            if length is None:
                out = tf.reverse(out, [1])
            else:
                out = tf.reverse_sequence(out, length, 1, 0)

    return out, mem