def reduce_to_final(images, num_filters_out, nhidden=None, scope=None): """ Reduce an image to afinal state by running two lstms Args: images: (batch_size, height, width, channels) tensor num_filters_out: output layer depth nhidden: hidden layer depth (defaults to num_filters_out) scope: optional scope name Return: A (batch_size, num_filters_out) tensor """ with variable_scope.variable_scope(scope, "Reduce_to_Final", [images]): nhidden = nhidden or num_filters_out batch_size, h, w, channels = _shape(images) transposed = array_ops.transpose(images, [1, 0, 2, 3]) reshaped = array_ops.reshape(transposed, [h, batch_size * w, channels]) with variable_scope.variable_scope("reduce1"): reduced = sequence_to_final(reshaped, nhidden) transposed_hidden = array_ops.reshape(reduced, [batch_size, w, nhidden]) hidden = array_ops.transpose(transposed_hidden, [1, 0, 2]) with variable_scope.variable_scope("reduce2"): output = sequence_to_final(hidden, num_filters_out) return output
def sequence_to_final(inputs, noutputs, scope=None, name=None, reverse=False): """Run an LSTM across all steps and return only the final state. Args: inputs: (seq_len, batch_size, depth) tensor noutputs: size of the output vector scope: optional scope name name: optional output tensor name reverse: switch to run in reverse Returns: batch of size (batch_size, noutputs) """ with variable_scope.variable_scope(scope, "Sequence_to_Final", [inputs]): seq_length, batch_size, _ = _shape(inputs) lstm = rnn_cell_impl.BasicLSTMCell(noutputs, state_is_tuple=False) state = array_ops.zeros([batch_size, lstm.state_size]) inputs_u = array_ops.unstack(inputs) if reverse: inputs_u = list(reversed(inputs_u)) for i in xrange(seq_length): if i > 0: variable_scope.get_variable_scope().reuse_variables() output, state = lstm(inputs_u[i], state) outputs = array_ops.reshape(output, [batch_size, noutputs], name=name) return outputs
def horizontal_lstm(images, num_filters_out, scope=None): with variable_scope.variable_scope(scope, "Horizontal_LSTM", [images]): batch_size, _, _, _ = _shape(images) sequence = images_to_sequence(images) with variable_scope.variable_scope("lr"): hidden_sequence_lr = ndlstm_base(sequence, num_filters_out // 2) with variable_scope.variable_scope("rl"): hidden_sequence_rl = ndlstm_base(sequence, num_filters_out - num_filters_out // 2, reverse=True) output_sequence = array_ops.concat( [hidden_sequence_lr, hidden_sequence_rl], 2) output = sequence_to_images(output_sequence, batch_size) return output
def ndlstm_base_dynamic(inputs, noutputs, scope=None, reverse=False): with variable_scope.variable_scope(scope, "Sequence_LSTM", [inputs]): _, batch_size, _ = _shape(inputs) lstm_cell = rnn_cell_impl.BasicLSTMCell(noutputs, state_is_tuple=True) lstm_cell.zero_state(batch_size, tf.float32) sequence_length = int(inputs.get_shape()[0]) sequence_lengths = math_ops.to_int64( array_ops.fill([batch_size], sequence_length)) if reverse: inputs = array_ops.reverse_v2(inputs, [0]) outputs, _ = rnn.dynamic_rnn(lstm_cell, inputs, sequence_lengths, dtype=tf.float32, time_major=True) if reverse: outputs = array_ops.reverse_v2(outputs, [0]) return outputs
def ndlstm_base_unrolled(inputs, noutput, scope=None, reverse=False): with variable_scope.variable_scope(scope, "LSTM_Seq_Unrolled", [inputs]): length, batch_size, _ = _shape(inputs) lstm_cell = rnn_cell_impl.BasicLSTMCell(noutput, state_is_tuple=False) state = array_ops.zeros([batch_size, lstm_cell.state_size]) output_u = [] inputs_u = array_ops.unstack(inputs) if reverse: inputs_u = list(reversed(inputs_u)) for i in xrange(length): if i > 0: variable_scope.get_variable_scope().reuse_variables() output, state = lstm_cell(inputs_u[i], state) output_u += [output] if reverse: output_u = list(reversed(output_u)) outputs = array_ops.stack(output_u) return outputs
def reduce_to_sequence(images, num_filters_out, scope=None): """Reduce an image to a sequence by scanning an LSTM over it vertically Args: images: (batch_size, height, width, channels) num_filters_out: output layer depth scope: optional scope name Return: A (width, batch_size, num_filters_out) sequence """ with variable_scope.variable_scope(scope, "Reduce_to_Sequence", [images]): batch_size, h, w, channels = _shape(images) transposed = array_ops.transpose(images, [1, 0, 2, 3]) reshaped = array_ops.reshape(transposed, [h, batch_size * w, channels]) reduced = sequence_to_final(reshaped, num_filters_out) output = array_ops.reshape(reduced, [batch_size, w, num_filters_out]) return output
def sequence_softmax(inputs, noutputs, scope=None, name=None, linear_name=None): """Run a softmax layer over all time_steps of an input sequence Args: inputs: (seq_length, batch_size, depth) tensor noutputs: output_depth scope: optional scope name name: optional name for output tensor linear_name: optional name for linear (pre-softmax) output Returns: A tensor of size (seq_length, batch_size, noutputs) """ seq_length, _, ninputs = _shape(inputs) inputs_u = array_ops.unstack(inputs) outputs_u = [] with variable_scope.variable_scope(scope, "Sequential_Softmax", [inputs]): initial_w = random_ops.truncated_normal([0 + ninputs, noutputs], stddev=0.1) initial_b = constant_op.constant(0.1, shape=[noutputs]) w = variables.model_variable("weights", initializer=initial_w) b = variables.model_variable("biases", initializer=initial_b) for i in xrange(seq_length): with variable_scope.variable_scope(scope, "Sequence_Softmax_Step", [inputs_u[i]]): linear = nn_ops.xw_plus_b_v1(inputs_u[i], w, b, name=linear_name) output = nn_ops.softmax(linear) outputs_u += [output] outputs = array_ops.stack(outputs_u, name=name) return outputs
def sequence_to_images(tensor, batch_size): w, seq_length, channels = _shape(tensor) h = seq_length // batch_size reshaped = array_ops.reshape(tensor, [w, batch_size, h, channels]) return array_ops.transpose(reshaped, [1, 2, 0, 3])
def images_to_sequence(tensor): batch_size, h, w, channels = _shape(tensor) transposed = array_ops.transpose(tensor, [2, 0, 1, 3]) return array_ops.reshape(transposed, [w, batch_size * h, channels])
def create_model(self, model_input, seq_len, vocab_size, target=None, is_training=True, keep_prob=1.): imageInputs1 = tf.cast(model_input, tf.float32) seq_lens = tf.cast(seq_len, tf.int32) seq_lens1 = tf.reshape(seq_lens, [FLAGS.batch_size]) self.keep_prob = keep_prob self.train_b = is_training imageInputs2 = tf.reshape(imageInputs1, [ FLAGS.batch_size, FLAGS.height, FLAGS.Bwidth, FLAGS.input_channels ]) batch_norm_params = { 'is_training': is_training, 'decay': 0.9, 'updates_collections': None } with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): x = imageInputs2 net = slim.conv2d(x, 16, [5, 5], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = lstm.separable_lstm(net, 2, kernel_size=(4, 3), scope='lstm2d_1') net = slim.fully_connected(net, 6, activation_fn=tf.nn.tanh) #net = slim.conv2d(net, 64, [5, 5], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = lstm.separable_lstm(net, 124, kernel_size=None, scope='lstm2d_2') shape = utils._shape(net) batch_size = shape[0] # should be (batch_size, h, w, channels) outputs = tf.transpose(net, [2, 0, 1, 3]) outputs = tf.reshape(outputs, [-1, shape[1] * shape[3]]) with tf.name_scope('Train'): with tf.variable_scope('ctc_loss_1') as scope: myInitializer = tf.truncated_normal_initializer( mean=0., stddev=0.075, seed=None, dtype=tf.float32) W = tf.get_variable('w', [shape[1] * shape[3], 200], initializer=myInitializer) # zero initialization on biases b = tf.get_variable('b', shape=[200], initializer=myInitializer) W1 = tf.get_variable('w1', [200, vocab_size], initializer=myInitializer) # zero initialization b1 = tf.get_variable('b1', [vocab_size], initializer=myInitializer) tf.summary.histogram('histogram-b-ctc', b) tf.summary.histogram('histogram-w-ctc', W) logits = tf.matmul(outputs, W) + b logits = slim.dropout(logits, is_training=is_training, scope='dropout') logits = tf.matmul(logits, W1) + b1 # reshape back to original shape logits = tf.reshape(logits, [-1, batch_size, vocab_size]) return {"predictions": logits}