예제 #1
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def reduce_to_final(images, num_filters_out, nhidden=None, scope=None):
    """ Reduce an image to afinal state by running two lstms

    Args:
        images: (batch_size, height, width, channels) tensor
        num_filters_out: output layer depth
        nhidden: hidden layer depth (defaults to num_filters_out)
        scope: optional scope name

    Return:
        A (batch_size, num_filters_out) tensor
    """
    with variable_scope.variable_scope(scope, "Reduce_to_Final", [images]):
        nhidden = nhidden or num_filters_out
        batch_size, h, w, channels = _shape(images)
        transposed = array_ops.transpose(images, [1, 0, 2, 3])
        reshaped = array_ops.reshape(transposed, [h, batch_size * w, channels])
        with variable_scope.variable_scope("reduce1"):
            reduced = sequence_to_final(reshaped, nhidden)
            transposed_hidden = array_ops.reshape(reduced,
                                                  [batch_size, w, nhidden])
            hidden = array_ops.transpose(transposed_hidden, [1, 0, 2])
        with variable_scope.variable_scope("reduce2"):
            output = sequence_to_final(hidden, num_filters_out)
        return output
예제 #2
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def sequence_to_final(inputs, noutputs, scope=None, name=None, reverse=False):
    """Run an LSTM across all steps and return only the final state.

    Args:
        inputs: (seq_len, batch_size, depth) tensor
        noutputs: size of the output vector
        scope: optional scope name
        name: optional output tensor name
        reverse: switch to run in reverse

    Returns:
        batch of size (batch_size, noutputs)
    """
    with variable_scope.variable_scope(scope, "Sequence_to_Final", [inputs]):
        seq_length, batch_size, _ = _shape(inputs)
        lstm = rnn_cell_impl.BasicLSTMCell(noutputs, state_is_tuple=False)
        state = array_ops.zeros([batch_size, lstm.state_size])
        inputs_u = array_ops.unstack(inputs)
        if reverse:
            inputs_u = list(reversed(inputs_u))

        for i in xrange(seq_length):
            if i > 0:
                variable_scope.get_variable_scope().reuse_variables()
            output, state = lstm(inputs_u[i], state)
        outputs = array_ops.reshape(output, [batch_size, noutputs], name=name)
        return outputs
예제 #3
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def horizontal_lstm(images, num_filters_out, scope=None):
    with variable_scope.variable_scope(scope, "Horizontal_LSTM", [images]):
        batch_size, _, _, _ = _shape(images)
        sequence = images_to_sequence(images)
        with variable_scope.variable_scope("lr"):
            hidden_sequence_lr = ndlstm_base(sequence, num_filters_out // 2)
        with variable_scope.variable_scope("rl"):
            hidden_sequence_rl = ndlstm_base(sequence,
                                             num_filters_out -
                                             num_filters_out // 2,
                                             reverse=True)

        output_sequence = array_ops.concat(
            [hidden_sequence_lr, hidden_sequence_rl], 2)
        output = sequence_to_images(output_sequence, batch_size)
    return output
예제 #4
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def ndlstm_base_dynamic(inputs, noutputs, scope=None, reverse=False):
    with variable_scope.variable_scope(scope, "Sequence_LSTM", [inputs]):
        _, batch_size, _ = _shape(inputs)
        lstm_cell = rnn_cell_impl.BasicLSTMCell(noutputs, state_is_tuple=True)
        lstm_cell.zero_state(batch_size, tf.float32)
        sequence_length = int(inputs.get_shape()[0])
        sequence_lengths = math_ops.to_int64(
            array_ops.fill([batch_size], sequence_length))
        if reverse:
            inputs = array_ops.reverse_v2(inputs, [0])
        outputs, _ = rnn.dynamic_rnn(lstm_cell,
                                     inputs,
                                     sequence_lengths,
                                     dtype=tf.float32,
                                     time_major=True)
        if reverse:
            outputs = array_ops.reverse_v2(outputs, [0])
        return outputs
예제 #5
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def ndlstm_base_unrolled(inputs, noutput, scope=None, reverse=False):
    with variable_scope.variable_scope(scope, "LSTM_Seq_Unrolled", [inputs]):
        length, batch_size, _ = _shape(inputs)
        lstm_cell = rnn_cell_impl.BasicLSTMCell(noutput, state_is_tuple=False)
        state = array_ops.zeros([batch_size, lstm_cell.state_size])
        output_u = []
        inputs_u = array_ops.unstack(inputs)
        if reverse:
            inputs_u = list(reversed(inputs_u))
        for i in xrange(length):
            if i > 0:
                variable_scope.get_variable_scope().reuse_variables()
            output, state = lstm_cell(inputs_u[i], state)
            output_u += [output]
        if reverse:
            output_u = list(reversed(output_u))
        outputs = array_ops.stack(output_u)
        return outputs
예제 #6
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def reduce_to_sequence(images, num_filters_out, scope=None):
    """Reduce an image to a sequence by scanning an LSTM over it vertically

    Args:
        images: (batch_size, height, width, channels)
        num_filters_out: output layer depth
        scope: optional scope name

    Return:
        A (width, batch_size, num_filters_out) sequence

    """
    with variable_scope.variable_scope(scope, "Reduce_to_Sequence", [images]):
        batch_size, h, w, channels = _shape(images)
        transposed = array_ops.transpose(images, [1, 0, 2, 3])
        reshaped = array_ops.reshape(transposed, [h, batch_size * w, channels])
        reduced = sequence_to_final(reshaped, num_filters_out)
        output = array_ops.reshape(reduced, [batch_size, w, num_filters_out])
        return output
예제 #7
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def sequence_softmax(inputs,
                     noutputs,
                     scope=None,
                     name=None,
                     linear_name=None):
    """Run a softmax layer over all time_steps of an input sequence

    Args:
        inputs: (seq_length, batch_size, depth) tensor
        noutputs: output_depth
        scope: optional scope name
        name: optional name for output tensor
        linear_name: optional name for linear (pre-softmax) output

    Returns:
        A tensor of size (seq_length, batch_size, noutputs)
    """
    seq_length, _, ninputs = _shape(inputs)
    inputs_u = array_ops.unstack(inputs)
    outputs_u = []
    with variable_scope.variable_scope(scope, "Sequential_Softmax", [inputs]):
        initial_w = random_ops.truncated_normal([0 + ninputs, noutputs],
                                                stddev=0.1)
        initial_b = constant_op.constant(0.1, shape=[noutputs])
        w = variables.model_variable("weights", initializer=initial_w)
        b = variables.model_variable("biases", initializer=initial_b)
        for i in xrange(seq_length):
            with variable_scope.variable_scope(scope, "Sequence_Softmax_Step",
                                               [inputs_u[i]]):
                linear = nn_ops.xw_plus_b_v1(inputs_u[i],
                                             w,
                                             b,
                                             name=linear_name)
                output = nn_ops.softmax(linear)
                outputs_u += [output]
        outputs = array_ops.stack(outputs_u, name=name)
    return outputs
예제 #8
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def sequence_to_images(tensor, batch_size):
    w, seq_length, channels = _shape(tensor)
    h = seq_length // batch_size
    reshaped = array_ops.reshape(tensor, [w, batch_size, h, channels])
    return array_ops.transpose(reshaped, [1, 2, 0, 3])
예제 #9
0
파일: lstm.py 프로젝트: rixingw/DocuScan
def images_to_sequence(tensor):
    batch_size, h, w, channels = _shape(tensor)
    transposed = array_ops.transpose(tensor, [2, 0, 1, 3])
    return array_ops.reshape(transposed, [w, batch_size * h, channels])
예제 #10
0
파일: model.py 프로젝트: rixingw/DocuScan
    def create_model(self,
                     model_input,
                     seq_len,
                     vocab_size,
                     target=None,
                     is_training=True,
                     keep_prob=1.):
        imageInputs1 = tf.cast(model_input, tf.float32)
        seq_lens = tf.cast(seq_len, tf.int32)
        seq_lens1 = tf.reshape(seq_lens, [FLAGS.batch_size])
        self.keep_prob = keep_prob
        self.train_b = is_training

        imageInputs2 = tf.reshape(imageInputs1, [
            FLAGS.batch_size, FLAGS.height, FLAGS.Bwidth, FLAGS.input_channels
        ])

        batch_norm_params = {
            'is_training': is_training,
            'decay': 0.9,
            'updates_collections': None
        }
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params):
            x = imageInputs2
            net = slim.conv2d(x, 16, [5, 5], scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = lstm.separable_lstm(net,
                                      2,
                                      kernel_size=(4, 3),
                                      scope='lstm2d_1')
            net = slim.fully_connected(net, 6, activation_fn=tf.nn.tanh)
            #net = slim.conv2d(net, 64, [5, 5], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = lstm.separable_lstm(net,
                                      124,
                                      kernel_size=None,
                                      scope='lstm2d_2')

        shape = utils._shape(net)
        batch_size = shape[0]
        # should be (batch_size, h, w, channels)
        outputs = tf.transpose(net, [2, 0, 1, 3])
        outputs = tf.reshape(outputs, [-1, shape[1] * shape[3]])

        with tf.name_scope('Train'):
            with tf.variable_scope('ctc_loss_1') as scope:
                myInitializer = tf.truncated_normal_initializer(
                    mean=0., stddev=0.075, seed=None, dtype=tf.float32)

                W = tf.get_variable('w', [shape[1] * shape[3], 200],
                                    initializer=myInitializer)
                # zero initialization on biases
                b = tf.get_variable('b',
                                    shape=[200],
                                    initializer=myInitializer)

                W1 = tf.get_variable('w1', [200, vocab_size],
                                     initializer=myInitializer)
                # zero initialization
                b1 = tf.get_variable('b1', [vocab_size],
                                     initializer=myInitializer)
            tf.summary.histogram('histogram-b-ctc', b)
            tf.summary.histogram('histogram-w-ctc', W)

        logits = tf.matmul(outputs, W) + b
        logits = slim.dropout(logits, is_training=is_training, scope='dropout')
        logits = tf.matmul(logits, W1) + b1

        # reshape back to original shape
        logits = tf.reshape(logits, [-1, batch_size, vocab_size])
        return {"predictions": logits}