def AddFCLayer(self, prev_layer, index, reuse=None): """Parse expression and add Fully Connected Layer. Args: prev_layer: Input tensor. index: Position in model_str to start parsing Returns: Output tensor, end index in model_str. """ pattern = re.compile(R'(F)(s|t|r|l|m)({\w+})?(\d+)') m = pattern.match(self.model_str, index) if m is None: return None, None fn = self._NonLinearity(m.group(2)) name = self._GetLayerName(m.group(0), index, m.group(3)) depth = int(m.group(4)) input_depth = shapes.tensor_dim(prev_layer, 1) * shapes.tensor_dim( prev_layer, 2) * shapes.tensor_dim(prev_layer, 3) # The slim fully connected is actually a 1x1 conv, so we have to crush the # dimensions on input. # Everything except batch goes to depth, and therefore has to be known. shaped = tf.reshape( prev_layer, [-1, input_depth], name=name + '_reshape_in') output = slim.fully_connected(shaped, depth, activation_fn=fn, scope=name, reuse=reuse) # Width and height are collapsed to 1. self.reduction_factors[1] = None self.reduction_factors[2] = None return tf.reshape( output, [shapes.tensor_dim(prev_layer, 0), 1, 1, depth], name=name + '_reshape_out'), m.end()
def _TFLSTMLayer(self, prev_layer, direction, dim, summarize, depth, name, reuse=None): # If the target dimension is y, we need to transpose. if dim == 'x': lengths = self.GetLengths(2, 1) inputs = prev_layer else: lengths = self.GetLengths(1, 1) inputs = tf.transpose(prev_layer, [0, 2, 1, 3], name=name + '_ytrans_in') input_batch = shapes.tensor_dim(inputs, 0) num_slices = shapes.tensor_dim(inputs, 1) num_steps = shapes.tensor_dim(inputs, 2) input_depth = shapes.tensor_dim(inputs, 3) # Reshape away the other dimension. inputs = tf.reshape( inputs, [-1, num_steps, input_depth], name=name + '_reshape_in') # We need to replicate the lengths by the size of the other dimension, and # any changes that have been made to the batch dimension. tile_factor = tf.to_float(input_batch * num_slices) / tf.to_float(tf.shape(lengths)[0]) lengths = tf.tile(lengths, [tf.cast(tile_factor, tf.int32)]) lengths = tf.cast(lengths, tf.int64) outputs = nn_ops.tfrnn_helper( inputs, lengths, cell_type='lstm', num_nodes=depth, direction=direction, name=name, reuse=reuse ) # Output depth is doubled if bi-directional. if direction == 'bidirectional': output_depth = depth * 2 else: output_depth = depth # Restore the other dimension. if summarize: outputs = tf.slice( outputs, [0, num_steps - 1, 0], [-1, 1, -1], name=name + '_sum_slice') outputs = tf.reshape( outputs, [input_batch, num_slices, 1, output_depth], name=name + '_reshape_out') else: outputs = tf.reshape( outputs, [input_batch, num_slices, num_steps, output_depth], name=name + '_reshape_out') if dim == 'y': outputs = tf.transpose(outputs, [0, 2, 1, 3], name=name + '_ytrans_out') return outputs
def _AddOutputLayer(self, prev_layer, out_dims, out_func, num_classes, reuse=None, sc=None): # Reduce dimensionality appropriate to the output dimensions. batch_in = shapes.tensor_dim(prev_layer, dim=0) height_in = shapes.tensor_dim(prev_layer, dim=1) width_in = shapes.tensor_dim(prev_layer, dim=2) depth_in = shapes.tensor_dim(prev_layer, dim=3) if out_dims: # Combine any remaining height and width with batch and unpack after. shaped = tf.reshape(prev_layer, [-1, depth_in]) else: # Everything except batch goes to depth, and therefore has to be known. shaped = tf.reshape(prev_layer, [-1, height_in * width_in * depth_in]) logits = slim.fully_connected(shaped, num_classes, activation_fn=None, reuse=reuse, scope=sc) if out_func == 'l': raise ValueError('Logistic not yet supported!') else: output = tf.nn.softmax(logits) # Reshape to the dessired output. if out_dims == 2: output_shape = [batch_in, height_in, width_in, num_classes] elif out_dims == 1: output_shape = [batch_in, height_in * width_in, num_classes] else: output_shape = [batch_in, num_classes] output = tf.reshape(output, output_shape, name='Output') logits = tf.reshape(logits, output_shape, name='Logits') return logits, output
def _AddOutputs(self, prev_layer, out_dims, out_func, num_classes, reuse=None, sc=None): logits, outputs = self._AddOutputLayer(prev_layer, out_dims, out_func, num_classes, reuse=reuse, sc=sc) height_in = shapes.tensor_dim(prev_layer, dim=1) self.ctc_width = self.layers.GetLengths(dim=2, factor=height_in) self.logits = logits self.output = outputs
def _LSTMLayer(self, prev_layer, direction, dim, summarize, depth, name, reuse=None): """Adds an LSTM layer with the given pre-parsed attributes. Always maps 4-D to 4-D regardless of summarize. Args: prev_layer: Input tensor. direction: 'forward' 'backward' or 'bidirectional' dim: 'x' or 'y', dimension to consider as time. summarize: True if we are to return only the last timestep. depth: Output depth. name: Some string naming the op. Returns: Output tensor. """ # If the target dimension is y, we need to transpose. if dim == 'x': lengths = self.GetLengths(2, 1) inputs = prev_layer else: lengths = self.GetLengths(1, 1) inputs = tf.transpose(prev_layer, [0, 2, 1, 3], name=name + '_ytrans_in') input_batch = shapes.tensor_dim(inputs, 0) num_slices = shapes.tensor_dim(inputs, 1) num_steps = shapes.tensor_dim(inputs, 2) input_depth = shapes.tensor_dim(inputs, 3) # Reshape away the other dimension. inputs = tf.reshape( inputs, [-1, num_steps, input_depth], name=name + '_reshape_in') # We need to replicate the lengths by the size of the other dimension, and # any changes that have been made to the batch dimension. tile_factor = tf.to_float(input_batch * num_slices) / tf.to_float(tf.shape(lengths)[0]) lengths = tf.tile(lengths, [tf.cast(tile_factor, tf.int32)]) lengths = tf.cast(lengths, tf.int64) outputs = nn_ops.rnn_helper( inputs, lengths, cell_type='lstm', num_nodes=depth, direction=direction, name=name, stddev=0.1, reuse=reuse ) # Output depth is doubled if bi-directional. if direction == 'bidirectional': output_depth = depth * 2 else: output_depth = depth # Restore the other dimension. if summarize: outputs = tf.slice( outputs, [0, num_steps - 1, 0], [-1, 1, -1], name=name + '_sum_slice') outputs = tf.reshape( outputs, [input_batch, num_slices, 1, output_depth], name=name + '_reshape_out') else: outputs = tf.reshape( outputs, [input_batch, num_slices, num_steps, output_depth], name=name + '_reshape_out') if dim == 'y': outputs = tf.transpose(outputs, [0, 2, 1, 3], name=name + '_ytrans_out') return outputs
def lstm_layer(inp, length=None, state=None, memory=None, num_nodes=None, backward=False, clip=100.0, reg_func=tf.nn.l2_loss, weight_reg=False, weight_collection="LSTMWeights", bias_reg=False, stddev=None, seed=None, decode=False, use_native_weights=False, name=None, reuse=None): """Adds ops for an LSTM layer. This adds ops for the following operations: input => (forward-LSTM|backward-LSTM) => output The direction of the LSTM is determined by `backward`. If it is false, the forward LSTM is used, the backward one otherwise. Args: inp: A 3-D tensor of shape [`batch_size`, `max_length`, `feature_dim`]. length: A 1-D tensor of shape [`batch_size`] and type int64. Each element represents the length of the corresponding sequence in `inp`. state: If specified, uses it as the initial state. memory: If specified, uses it as the initial memory. num_nodes: The number of LSTM cells. backward: If true, reverses the `inp` before adding the ops. The output is also reversed so that the direction is the same as `inp`. clip: Value used to clip the cell values. reg_func: Function used for the weight regularization such as `tf.nn.l2_loss`. weight_reg: If true, regularize the filter weights with `reg_func`. weight_collection: Collection to add the weights to for regularization. bias_reg: If true, regularize the bias vector with `reg_func`. stddev: Standard deviation used to initialize the variables. seed: Seed used to initialize the variables. decode: If true, does not add ops which are not used for inference. use_native_weights: If true, uses weights in the same format as the native implementations. name: Name of the op. Returns: A 3-D tensor of shape [`batch_size`, `max_length`, `num_nodes`]. """ with tf.variable_scope(name, reuse=reuse): if backward: if length is None: inp = tf.reverse(inp, [1]) else: inp = tf.reverse_sequence(inp, length, 1, 0) num_prev = inp.get_shape()[2] if stddev: initializer = tf.truncated_normal_initializer(stddev=stddev, seed=seed) else: initializer = tf.uniform_unit_scaling_initializer(seed=seed) if use_native_weights: with tf.variable_scope("LSTMCell"): w = tf.get_variable( "W_0", shape=[num_prev + num_nodes, 4 * num_nodes], initializer=initializer, dtype=tf.float32) w_i_m = tf.slice(w, [0, 0], [num_prev, 4 * num_nodes], name="w_i_m") w_m_m = tf.reshape(tf.slice(w, [num_prev, 0], [num_nodes, 4 * num_nodes]), [num_nodes, 4, num_nodes], name="w_m_m") else: w_i_m = tf.get_variable("w_i_m", [num_prev, 4 * num_nodes], initializer=initializer) w_m_m = tf.get_variable("w_m_m", [num_nodes, 4, num_nodes], initializer=initializer) if not decode and weight_reg: tf.add_to_collection(weight_collection, reg_func(w_i_m, name="w_i_m_reg")) tf.add_to_collection(weight_collection, reg_func(w_m_m, name="w_m_m_reg")) batch_size = shapes.tensor_dim(inp, dim=0) num_frames = shapes.tensor_dim(inp, dim=1) prev = tf.reshape(inp, tf.stack([batch_size * num_frames, num_prev])) if use_native_weights: with tf.variable_scope("LSTMCell"): b = tf.get_variable("B", shape=[4 * num_nodes], initializer=tf.zeros_initializer(), dtype=tf.float32) biases = tf.identity(b, name="biases") else: biases = tf.get_variable("biases", [4 * num_nodes], initializer=tf.constant_initializer(0.0)) if not decode and bias_reg: tf.add_to_collection(weight_collection, reg_func(biases, name="biases_reg")) prev = tf.nn.xw_plus_b(prev, w_i_m, biases) prev = tf.reshape(prev, tf.stack([batch_size, num_frames, 4, num_nodes])) if state is None: state = tf.fill(tf.stack([batch_size, num_nodes]), 0.0) if memory is None: memory = tf.fill(tf.stack([batch_size, num_nodes]), 0.0) out, _, mem = rnn.variable_lstm(prev, state, memory, w_m_m, clip=clip) if backward: if length is None: out = tf.reverse(out, [1]) else: out = tf.reverse_sequence(out, length, 1, 0) return out, mem