Exemple #1
0
def simple_toy_example_2():
	#num_time_steps = 2
	#num_batches = 3
	#alphabet_size = 5  # Feature size.
	blank_label = 0

	probs = np.array([
		[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]],
		[[0.1, 0.1, 0.1, 0.6, 0.1], [0.1, 0.1, 0.1, 0.1, 0.6]],
		[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.6, 0.1]]
	])
	probs = np.transpose(probs, (1, 0, 2))  # (batches, time-steps, features) -> (time-steps, batches, features).
	#probs = np.log(probs)  # ???
	prob_lens = np.array([2, 2, 2])
	#labels = np.array([[1, 2], [3, 4], [1, 3]])  # InvalidArgumentError (see above for traceback): flat_labels is not a vector.
	labels = np.array([
		1, 2,
		3, 4,
		1, 3
	])
	label_lens = np.array([2, 2, 2])

	ctc_costs = warpctc_tensorflow.ctc(probs, labels, label_lens, prob_lens, blank_label=blank_label)

	with tf.Session() as sess:
		costs = sess.run(ctc_costs)

	print('CTC costs =', costs)
Exemple #2
0
def simple_toy_example_1():
	#num_time_steps = 5
	#num_batches = 2
	#alphabet_size = 6  # Feature size.
	blank_label = 5

	activations = np.array([
		[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553],
		 [0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508]],
		[[0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436],
		 [0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549]],
		[[0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688],
		 [0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456]],
		[[0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533],
		 [0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345]],
		[[0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107],
		 [0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046]]
	])
	activations = np.log(activations)  # ???
	activation_lens = np.array([5, 5])
	labels = np.array([
		0, 1, 2, 1, 0,
		0, 1, 1, 0
	])
	label_lens = np.array([5, 4])

	# Expected CTC = [3.3421143650988143, 5.42262].
	ctc_costs = warpctc_tensorflow.ctc(activations, labels, label_lens, activation_lens, blank_label=blank_label)

	with tf.Session() as sess:
		costs = sess.run(ctc_costs)

	print('CTC costs =', costs)
Exemple #3
0
 def __init__(self, input_dim=128, output_dim=104, learning_rate=0.001):
     super(RNN, self).__init__()
     self.input_dim = input_dim
     self.output_dim = output_dim
     self.inp = Input(shape=(None, self.input_dim), name="Input")
     self.batch_norm = keras.layers.normalization.BatchNormalization()(
         self.inp)
     # self.gru_1 = GRU(256, return_sequences=True, kernel_initializer='he_normal', name='gru1')(self.batch_norm)
     # self.gru_1b = GRU(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(self.inp)
     # self.gru1_merged = add([self.gru_1, self.gru_1b])
     # self.gru_2 = GRU(256, return_sequences=True, kernel_initializer='he_normal', name='gru2')(self.gru1_merged)
     # self.gru_2b = GRU(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(self.gru1_merged)
     self.gru_1 = Bidirectional(GRU(256,
                                    return_sequences=True,
                                    kernel_initializer='he_normal',
                                    name='gru1'),
                                merge_mode="sum")(self.batch_norm)
     self.gru_2 = Bidirectional(GRU(256,
                                    return_sequences=True,
                                    kernel_initializer='he_normal',
                                    name='gru2'),
                                merge_mode="concat")(self.gru_1)
     self.y_pred = TimeDistributed(
         Dense(self.output_dim,
               kernel_initializer='he_normal',
               name='dense2',
               activation='linear'))(self.gru_2)
     self.model = Model(inputs=self.inp, outputs=self.y_pred)
     self.model.summary()
     self.y_true = K.placeholder(name='y_true', ndim=1, dtype='int32')
     self.input_length = K.placeholder(name='input_length',
                                       ndim=1,
                                       dtype='int32')
     self.label_length = K.placeholder(name='label_length',
                                       ndim=1,
                                       dtype='int32')
     self.loss_out = K.mean(
         warpctc_tensorflow.ctc(tf.transpose(self.y_pred,
                                             perm=[1, 0, 2]), self.y_true,
                                self.label_length, self.input_length))
     # self.ctc_loss = K.function([self.y_true, self.y_pred, self.input_length, self.label_length, K.learning_phase()], \
     # 				[self.loss_out])
     # self.optimizer = keras.optimizers.Adam(lr = learning_rate)
     self.optimizer = keras.optimizers.SGD(lr=learning_rate,
                                           decay=1e-6,
                                           momentum=0.9,
                                           nesterov=True,
                                           clipnorm=200)
     self.update = self.optimizer.get_updates(self.model.trainable_weights,
                                              [],
                                              loss=self.loss_out)
     self.network_output = K.ctc_decode(
         Activation('softmax')(self.y_pred), self.input_length, True)[0][0]
     self.train_step = K.function([self.inp, self.y_true, self.input_length, self.label_length, K.learning_phase()], \
          [self.loss_out, self.y_pred], updates = self.update)
     self.test = K.argmax(self.y_pred, axis=2)
     self.predict_step = K.function(
         [self.inp, self.input_length,
          K.learning_phase()], [self.network_output])
Exemple #4
0
    def cost(self) -> tf.Tensor:
        loss = warpctc.ctc(activations=self.logits,
                           flat_labels=self.flat_labels,
                           label_lengths=self.label_lengths,
                           input_lengths=self.encoder.lengths,
                           blank_label=len(self.vocabulary))

        return tf.reduce_sum(loss)
Exemple #5
0
def loss(logits, seq_length, labels, label_length):
    """Calculate the networks CTC loss.

    Args:
        logits (tf.Tensor):
            3D float Tensor. If time_major == False, this will be a Tensor shaped:
            [batch_size, max_time, num_classes]. If time_major == True (default), this will be a
            Tensor shaped: [max_time, batch_size, num_classes]. The logits.

        labels (tf.SparseTensor or tf.Tensor):
            An int32 SparseTensor. labels.indices[i, :] == [b, t] means labels.values[i] stores the
            id for (batch b, time t). labels.values[i] must take on values in [0, num_labels), if
            `FLAGS.use_warp_ctc` is false.
            Else, an int32 dense Tensor version of the above sparse version.

        seq_length (tf.Tensor):
            1D int32 vector, size [batch_size]. The sequence lengths.

        label_length (tf.Tensor):
            1D Tensor with the length of each label within the batch. Shape [batch_size].

    Returns:
        tf.Tensor:
            1D float Tensor with size [1], containing the mean loss.
    """
    if FLAGS.use_warp_ctc:
        # Labels need to be a 1D vector, with every label concatenated.
        flat_labels = tf.reshape(labels, [-1])

        # Remove padding from labels.
        partitions = tf.cast(tf.equal(flat_labels, 0), tf.int32)
        flat_labels, _ = tf.dynamic_partition(flat_labels, partitions, 2)

        # `label_length` needs to be a 1D vector.
        flat_label_length = tf.reshape(label_length, [-1])

        # https://github.com/baidu-research/warp-ctc
        total_loss = warp_ctc.ctc(activations=logits,
                                  flat_labels=flat_labels,
                                  label_lengths=flat_label_length,
                                  input_lengths=seq_length,
                                  blank_label=28)

        # total_loss = tf.Print(total_loss, [total_loss], message='total_loss ')

    else:
        # https://www.tensorflow.org/api_docs/python/tf/nn/ctc_loss
        total_loss = tf.nn.ctc_loss(labels=labels,
                                    inputs=logits,
                                    sequence_length=seq_length,
                                    preprocess_collapse_repeated=False,
                                    ctc_merge_repeated=True,
                                    time_major=True)

    # Return average CTC loss.
    return tf.reduce_mean(total_loss)
Exemple #6
0
 def createCtcCriterion(self):
     # using built-in ctc loss calculator
     # self.loss = tf.nn.ctc_loss(self.target, self.result, self.lossSeqLengths)
     # using baidu's warp ctc loss calculator
     self.loss = warpctc_tensorflow.ctc(self.result,
                                        self.lossTarget,
                                        self.targetSeqLengths,
                                        self.inputSeqLengths,
                                        blank_label=36)
     self.cost = tf.reduce_mean(self.loss)
Exemple #7
0
 def _setup_loss(self, logits):
     """
         Function returning loss.
     """
     with tf.name_scope("loss"):
         loss = warpctc_tensorflow.ctc(self.logits,
                                       self.Y_batch.values,
                                       self.Y_batch_len,
                                       tf.div(self.X_batch_len,
                                              self.shrink_factor),
                                       blank_label=8)
         return tf.reduce_mean(loss)
Exemple #8
0
    def _run_ctc(self,
                 activations,
                 input_lengths,
                 flat_labels,
                 label_lengths,
                 expected_costs,
                 expected_gradients,
                 use_gpu=False,
                 expected_error=None):
        self.assertEquals(activations.shape, expected_gradients.shape)
        activations_t = tf.constant(activations)
        input_lengths_t = tf.constant(input_lengths)
        flat_labels_t = tf.constant(flat_labels)
        label_lengths_t = tf.constant(label_lengths)
        costs = ctc(activations=activations_t,
                    flat_labels=flat_labels_t,
                    label_lengths=label_lengths_t,
                    input_lengths=input_lengths_t)

        grad = tf.gradients(costs, [activations_t])[0]

        self.assertShapeEqual(expected_costs, costs)

        self.assertShapeEqual(expected_gradients, grad)

        log_dev_placement = False
        if not use_gpu:
            # Note: using use_gpu=False seems to not work
            # it runs the GPU version instead
            config = tf.ConfigProto(log_device_placement=log_dev_placement,
                                    device_count={'GPU': 0})
        else:
            config = tf.ConfigProto(log_device_placement=log_dev_placement,
                                    allow_soft_placement=False)

        with self.test_session(use_gpu=use_gpu,
                               force_gpu=use_gpu,
                               config=config) as sess:
            if expected_error is None:
                (tf_costs, tf_grad) = sess.run([costs, grad])
                self.assertAllClose(tf_costs, expected_costs, atol=1e-6)
                self.assertAllClose(tf_grad, expected_gradients, atol=1e-6)
            else:
                with self.assertRaisesOpError(expected_error):
                    sess.run([costs, grad])

                    sess.run([costs, grad])
Exemple #9
0
    def ctc_loss(self, logits, len_logits, labels, len_labels):
        """
        No valid path found: It is possible that no valid path is found if the
        activations for the targets are zero.
        """
        with tf.name_scope("ctc_loss"):
            if self.args.model.use_wrapctc:
                import warpctc_tensorflow
                from tfTools.tfTools import get_indices

                indices = get_indices(len_labels)
                flat_labels = tf.gather_nd(labels, indices)
                ctc_loss = warpctc_tensorflow.ctc(
                    activations=tf.transpose(logits, [1, 0, 2]),
                    flat_labels=flat_labels,
                    label_lengths=len_labels,
                    input_lengths=len_logits,
                    blank_label=self.args.dim_output)
            else:
                # with tf.get_default_graph()._kernel_label_map({"CTCLoss": "WarpCTC"}):
                labels_sparse = dense_sequence_to_sparse(labels, len_labels)
                ctc_loss = tf.nn.ctc_loss(
                    labels_sparse,
                    logits,
                    sequence_length=len_logits,
                    ctc_merge_repeated=self.ctc_merge_repeated,
                    ignore_longer_outputs_than_inputs=True,
                    time_major=False)

        if self.args.model.policy_learning:
            from tfModels.regularization import policy_learning

            softmax_temperature = self.model.decoder.softmax_temperature
            dim_output = self.dim_output
            decoded_sparse = self.ctc_decode(logits, len_logits)
            rl_loss = policy_learning(logits, len_logits, decoded_sparse,
                                      labels, len_labels, softmax_temperature,
                                      dim_output, self.args)
            ctc_loss += self.args.model.policy_learning * rl_loss

        return ctc_loss
    def _run_ctc(self, activations, input_lengths,
                 flat_labels, label_lengths,
                 expected_costs, expected_gradients,
                 use_gpu=False, expected_error=None):
        self.assertEquals(activations.shape, expected_gradients.shape)
        activations_t = tf.constant(activations)
        input_lengths_t = tf.constant(input_lengths)
        flat_labels_t = tf.constant(flat_labels)
        label_lengths_t = tf.constant(label_lengths)
        costs = ctc(activations=activations_t,
                    flat_labels=flat_labels_t,
                    label_lengths=label_lengths_t,
                    input_lengths=input_lengths_t)

        grad = tf.gradients(costs, [activations_t])[0]

        self.assertShapeEqual(expected_costs, costs)

        self.assertShapeEqual(expected_gradients, grad)

        log_dev_placement = False
        if not use_gpu:
            # Note: using use_gpu=False seems to not work
            # it runs the GPU version instead
            config = tf.ConfigProto(log_device_placement=log_dev_placement,
                                    device_count={'GPU': 0})
        else:
            config = tf.ConfigProto(log_device_placement=log_dev_placement,
                                    allow_soft_placement=False)

        with self.test_session(use_gpu=use_gpu, force_gpu=use_gpu, config=config) as sess:
            if expected_error is None:
                (tf_costs, tf_grad) = sess.run([costs, grad])
                self.assertAllClose(tf_costs, expected_costs, atol=1e-6)
                self.assertAllClose(tf_grad, expected_gradients, atol=1e-6)
            else:
                with self.assertRaisesOpError(expected_error):
                    sess.run([costs, grad])

                    sess.run([costs, grad])
    def build_loss(self):
        time_step_batch = self.get_output('time_step_len')
        logits_batch = self.get_output('logits')
        labels = self.get_output('labels')
        label_len = self.get_output('labels_len')

        ctc_loss = warpctc_tensorflow.ctc(activations=logits_batch,
                                          flat_labels=labels,
                                          label_lengths=label_len,
                                          input_lengths=time_step_batch)
        loss = tf.reduce_mean(ctc_loss)
        decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits_batch,
                                                          time_step_batch,
                                                          merge_repeated=True)
        dense_decoded = tf.cast(
            tf.sparse_tensor_to_dense(decoded[0], default_value=0), tf.int32)

        # add regularizer
        if cfg.TRAIN.WEIGHT_DECAY > 0:
            regularization_losses = tf.get_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES)
            loss = tf.add_n(regularization_losses) + loss

        return loss, dense_decoded
    def build_model(self):
        # Helper Variables
        self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
        self.global_step_inc = self.global_step_tensor.assign(self.global_step_tensor + 1)
        self.global_epoch_tensor = tf.Variable(0, trainable=False, name='global_epoch')
        self.global_epoch_inc = self.global_epoch_tensor.assign(self.global_epoch_tensor + 1)

        # Inputs to the network
        with tf.variable_scope('inputs'):
            self.x, y, self.length, self.lab_length = self.data_loader.get_input()
            self.y = tf.contrib.layers.dense_to_sparse(y, eos_token=-1)
            self.x = tf.transpose(self.x, [2, 0, 1])
            self.is_training = tf.placeholder(tf.bool, name='Training_flag')
        tf.add_to_collection('inputs', self.x)
        tf.add_to_collection('inputs', self.length)
        tf.add_to_collection('inputs', self.lab_length)
        tf.add_to_collection('inputs', y)
        tf.add_to_collection('inputs', self.is_training)

        # Network Architecture
        out_W = tf.Variable(tf.truncated_normal([2 * self.rnn_num_hidden, self.data_loader.num_classes], stddev=0.1),
                            name='out_W')
        out_b = tf.Variable(tf.constant(0., shape=[self.data_loader.num_classes]), name='out_b')

        # RNN
        output = self.x
        with tf.variable_scope('MultiRNN', reuse=tf.AUTO_REUSE):
            for i in range(self.rnn_num_layers):
                lstm = tf.contrib.cudnn_rnn.CudnnLSTM(1, self.rnn_num_hidden, 'linear_input', 'bidirectional')
                output, state = lstm(output)
                if i < self.rnn_num_layers - 1:
                    output = tf.layers.dropout(output, self.rnn_dropout, noise_shape=tf.constant(
                        value=[1, self.config.batch_size, 2 * self.rnn_num_hidden]), training=self.is_training)


        # Fully Connected
        with tf.name_scope('Dense'):
            output = tf.concat(output, 2)
            # Reshaping to apply the same weights over the timesteps
            output = tf.reshape(output, [-1, 2*self.rnn_num_hidden])
            # Doing the affine projection
            logits = tf.matmul(output, out_W) + out_b

        # Reshaping back to the original shape
        self.logits = tf.reshape(logits, [self.config.batch_size, -1, self.data_loader.num_classes])
        self.logits = tf.transpose(self.logits, (1, 0, 2))

        with tf.variable_scope('loss-acc'):
            self.loss = warpctc_tensorflow.ctc(self.logits, self.y.values, self.lab_length, self.length,
                                               self.data_loader.num_classes - 1)
            self.cost = tf.reduce_mean(self.loss)
            self.prediction = tf.nn.ctc_beam_search_decoder(self.logits, sequence_length=self.length,
                                                            merge_repeated=False)
            self.cer = self.calc_cer(self.prediction[0][0], self.y)

        with tf.variable_scope('train_step'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step = tf.train.RMSPropOptimizer(learning_rate=self.config.learning_rate).minimize(
                    self.loss, global_step=self.global_step_tensor)

        tf.add_to_collection('train', self.train_step)
        tf.add_to_collection('train', self.cost)
        tf.add_to_collection('train', self.cer)
Exemple #13
0
    def __init__(self, learning_rate=0.001):
        conv_filters = 16
        kernel_size = (3, 3)
        pool_size = 2
        time_dense_size = 32
        rnn_size = 512
        img_h = 32
        act = 'relu'

        self.width = K.placeholder(name='width', ndim=0, dtype='int32')
        self.input_data = Input(name='the_input',
                                shape=(None, img_h, 1),
                                dtype='float32')
        self.inner = Conv2D(conv_filters,
                            kernel_size,
                            padding='same',
                            activation=act,
                            kernel_initializer='he_normal',
                            name='conv1')(self.input_data)
        self.inner = MaxPooling2D(pool_size=(pool_size, pool_size),
                                  name='max1')(self.inner)
        self.inner = Conv2D(conv_filters,
                            kernel_size,
                            padding='same',
                            activation=act,
                            kernel_initializer='he_normal',
                            name='conv2')(self.inner)
        self.inner = MaxPooling2D(pool_size=(pool_size, pool_size),
                                  name='max2')(self.inner)

        self.inner = Lambda(self.res, arguments={"last_dim": (img_h // (pool_size ** 2)) * conv_filters \
                                                      , "width": self.width // 4})(self.inner)

        # cuts down input size going into RNN:
        self.inp = Dense(time_dense_size, activation=act,
                         name='dense1')(self.inner)
        self.batch_norm = keras.layers.normalization.BatchNormalization()(
            self.inp)
        self.gru_1 = Bidirectional(GRU(rnn_size,
                                       return_sequences=True,
                                       kernel_initializer='he_normal',
                                       name='gru1'),
                                   merge_mode="sum")(self.batch_norm)
        self.gru_2 = Bidirectional(GRU(rnn_size,
                                       return_sequences=True,
                                       kernel_initializer='he_normal',
                                       name='gru2'),
                                   merge_mode="concat")(self.gru_1)
        self.y_pred = TimeDistributed(
            Dense(63,
                  kernel_initializer='he_normal',
                  name='dense2',
                  activation='linear'))(self.gru_2)
        self.model = Model(inputs=self.input_data, outputs=self.y_pred)
        self.model.summary()
        self.out = K.function(
            [self.input_data, self.width,
             K.learning_phase()], [self.y_pred])
        self.y_true = K.placeholder(name='y_true', ndim=1, dtype='int32')
        self.input_length = K.placeholder(name='input_length',
                                          ndim=1,
                                          dtype='int32')
        self.label_length = K.placeholder(name='label_length',
                                          ndim=1,
                                          dtype='int32')
        self.loss_out = K.mean(
            warpctc_tensorflow.ctc(tf.transpose(self.y_pred,
                                                perm=[1, 0, 2]), self.y_true,
                                   self.label_length, self.input_length))
        # self.optimizer = keras.optimizers.Adam(lr = learning_rate)
        self.optimizer = keras.optimizers.SGD(lr=learning_rate,
                                              decay=1e-6,
                                              momentum=0.9,
                                              nesterov=True,
                                              clipnorm=200)
        self.update = self.optimizer.get_updates(self.model.trainable_weights,
                                                 [],
                                                 loss=self.loss_out)
        self.network_output = K.ctc_decode(
            Activation('softmax')(self.y_pred), self.input_length, True)[0][0]
        self.train_step = K.function([self.input_data, self.width, self.y_true, self.input_length, self.label_length, K.learning_phase()], \
             [self.loss_out, self.y_pred], updates = self.update)
        self.test = K.argmax(self.y_pred, axis=2)
        self.predict_step = K.function([
            self.input_data, self.width, self.input_length,
            K.learning_phase()
        ], [self.network_output])
    def __init__(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            # e.g: log filter bank or MFCC features
            # Has size [batch_size, max_stepsize, num_features], but the
            # batch_size and max_stepsize can vary along each step
            self.inputs = tf.placeholder(tf.float32,
                                         [None, None, num_features, 3])

            # Here we use sparse_placeholder that will generate a
            # SparseTensor required by ctc_loss op.
            # self.labels = tf.sparse_placeholder(tf.int32)
            self.labels = tf.placeholder(tf.int32, [None])

            # 1d array of size [batch_size]
            self.seq_len = tf.placeholder(tf.int32, [None])
            self.label_len = tf.placeholder(tf.int32, [None])

            self.output_keep_prob = tf.placeholder("float")
            self.input_keep_prob = tf.placeholder("float")

            # CNN model
            W_conv1 = weight_variable([3, 3, 3, 64])
            b_conv1 = bias_variable([64])
            h_conv1 = tf.nn.relu(conv2d(self.inputs, W_conv1) + b_conv1)

            h_pool1 = max_pool_2x2(h_conv1)

            W_conv2 = weight_variable([3, 3, 64, 128])
            b_conv2 = bias_variable([128])
            h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

            h_pool2 = max_pool_2x2(h_conv2)

            # h_pool2 shape [64, 140, 7, 128]
            shape = tf.shape(h_pool2)
            batch_s, max_timesteps, features_num = shape[0], shape[1], shape[2]

            # reshape to [batch_size, max_timesteps, features]
            h_pool2 = tf.reshape(h_pool2, [batch_s, -1, num_features * 32])

            # Define bi-lstm cells with tensorflow
            # Forward direction cell
            lstm_fw_cell = tf.contrib.rnn.LSTMCell(FLAGS.num_hidden,
                                                   forget_bias=1.0)
            # add dropout
            lstm_fw_cell = tf.contrib.rnn.DropoutWrapper(
                cell=lstm_fw_cell,
                input_keep_prob=self.input_keep_prob,
                output_keep_prob=self.output_keep_prob)

            # Backward direction cell
            lstm_bw_cell = tf.contrib.rnn.LSTMCell(FLAGS.num_hidden,
                                                   forget_bias=1.0)
            # add dropout
            lstm_bw_cell = tf.contrib.rnn.DropoutWrapper(
                cell=lstm_bw_cell,
                input_keep_prob=self.input_keep_prob,
                output_keep_prob=self.output_keep_prob)

            outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,
                                                         lstm_bw_cell,
                                                         h_pool2,
                                                         self.seq_len,
                                                         dtype=tf.float32)
            # combine backward and forward lstm cell outputs
            outputs = tf.concat(outputs, 2)
            # Reshaping to apply the same weights over the timesteps
            outputs = tf.reshape(outputs, [-1, FLAGS.num_hidden * 2])

            # full connection layer
            W = tf.Variable(tf.truncated_normal(
                [FLAGS.num_hidden * 2, num_classes],
                stddev=0.1,
                dtype=tf.float32),
                            name='W')

            ## 2 layer LSTM model
            ## Stacking rnn cells
            # stack = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.LSTMCell(FLAGS.num_hidden,state_is_tuple=True) for _ in range(FLAGS.num_layers)] , state_is_tuple=True)
            # outputs, _ = tf.nn.dynamic_rnn(stack, h_pool2, self.seq_len, dtype=tf.float32)

            ## Reshaping to apply the same weights over the timesteps
            # outputs = tf.reshape(outputs, [-1, FLAGS.num_hidden])

            ## full connection layer
            # W = tf.Variable(tf.truncated_normal([FLAGS.num_hidden,num_classes], stddev=0.1, dtype=tf.float32), name='W')

            # Zero initialization
            b = tf.Variable(
                tf.constant(0.,
                            dtype=tf.float32,
                            shape=[num_classes],
                            name='b'))

            # Doing the affine projection
            logits = tf.matmul(outputs, W) + b

            # Reshaping back to the original shape
            logits = tf.reshape(logits, [batch_s, -1, num_classes])

            # Time major
            logits = tf.transpose(logits, (1, 0, 2))

            self.global_step = tf.Variable(0, trainable=False)

            # self.loss = tf.nn.ctc_loss(labels=self.labels,inputs=logits, sequence_length=self.seq_len)
            self.loss = warpctc_tensorflow.ctc(activations=logits,
                                               flat_labels=self.labels,
                                               label_lengths=self.label_len,
                                               input_lengths=self.seq_len)
            self.regularizer = tf.nn.l2_loss(W_conv1) + tf.nn.l2_loss(
                W_conv2) + tf.nn.l2_loss(W)

            self.cost = tf.reduce_mean(self.loss) + 0.01 * self.regularizer

            # learning_rate=tf.train.exponential_decay(FLAGS.initial_learning_rate,
            #        self.global_step,
            #        FLAGS.decay_steps,
            #        FLAGS.decay_rate,staircase=True)

            # tf.summary.scalar('lr',learning_rate)

            #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
            #        momentum=FLAGS.momentum,use_nesterov=True).minimize(cost,global_step=global_step)
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=FLAGS.initial_learning_rate,
                beta1=FLAGS.beta1,
                beta2=FLAGS.beta2).minimize(self.cost,
                                            global_step=self.global_step)

            # Option 2: tf.contrib.ctc.ctc_beam_search_decoder
            # (it's slower but you'll get better results)
            self.decoded, self.log_prob = tf.nn.ctc_greedy_decoder(
                logits, self.seq_len, merge_repeated=True)
            # self.decoded, self.log_prob = tf.nn.ctc_beam_search_decoder(logits, self.seq_len,merge_repeated=True)

            self.dense_decoded = tf.sparse_tensor_to_dense(self.decoded[0],
                                                           default_value=-1)
            # Inaccuracy: label error rate
            #self.lerr = tf.reduce_mean(tf.edit_distance(tf.cast(self.decoded[0], tf.int32), self.labels))

            tf.summary.scalar('cost', self.cost)
            # tf.summary.scalar('lerr',self.lerr)
            self.merged_summay = tf.summary.merge_all()
    def build_model(self):
        # Helper Variables
        self.global_step_tensor = tf.Variable(0,
                                              trainable=False,
                                              name='global_step')
        self.global_step_inc = self.global_step_tensor.assign(
            self.global_step_tensor + 1)
        self.global_epoch_tensor = tf.Variable(0,
                                               trainable=False,
                                               name='global_epoch')
        self.global_epoch_inc = self.global_epoch_tensor.assign(
            self.global_epoch_tensor + 1)

        # Inputs to the network
        with tf.variable_scope('inputs'):
            self.x, y, self.length, self.lab_length = self.data_loader.get_input(
            )
            self.y = tf.contrib.layers.dense_to_sparse(y, eos_token=-1)
            self.x = tf.expand_dims(self.x, 3)
            # Center Images
            x_shift = (tf.shape(self.x)[2] - self.length) / tf.constant(2)
            y_shift = tf.zeros_like(x_shift)
            translation_vector = tf.cast(tf.stack([x_shift, y_shift], axis=1),
                                         tf.float32)
            self.x = tf.contrib.image.translate(self.x, translation_vector)
            self.length = tf.cast(
                tf.math.ceil(
                    tf.math.divide(self.length,
                                   tf.constant(self.reduce_factor))), tf.int32)
            batch_size = tf.shape(self.x)[0]
            self.is_training = tf.placeholder(tf.bool, name='Training_flag')
        tf.add_to_collection('inputs', self.x)
        tf.add_to_collection('inputs', self.length)
        tf.add_to_collection('inputs', self.lab_length)
        tf.add_to_collection('inputs', y)
        tf.add_to_collection('inputs', self.is_training)

        # Define CNN variables
        intitalizer = tf.contrib.layers.xavier_initializer_conv2d()

        out_W = tf.Variable(tf.truncated_normal(
            [2 * self.rnn_num_hidden, self.data_loader.num_classes],
            stddev=0.1),
                            name='out_W')
        out_b = tf.Variable(tf.constant(0.,
                                        shape=[self.data_loader.num_classes]),
                            name='out_b')

        # CNNs
        with tf.name_scope('CNN_Block_1'):
            conv1_out = tf.layers.dropout(self.x,
                                          self.conv_dropouts[0],
                                          tf.concat([
                                              tf.reshape(batch_size, [-1]),
                                              tf.constant(value=[1, 1, 1])
                                          ], 0),
                                          training=self.is_training)
            conv1_out = tf.layers.conv2d(conv1_out,
                                         self.conv_depths[0],
                                         self.conv_patch_sizes[0],
                                         padding='same',
                                         activation=None,
                                         kernel_initializer=intitalizer)
            conv1_out = tf.layers.batch_normalization(conv1_out)
            conv1_out = tf.nn.leaky_relu(conv1_out)
            conv1_out = tf.layers.max_pooling2d(conv1_out,
                                                2,
                                                2,
                                                padding='same')

        with tf.name_scope('CNN_Block_2'):
            conv2_out = tf.layers.dropout(
                conv1_out,
                self.conv_dropouts[1],
                noise_shape=tf.concat([
                    tf.reshape(batch_size, [-1]),
                    tf.constant(value=[1, 1, self.conv_depths[0]])
                ], 0),
                training=self.is_training)
            conv2_out = tf.layers.conv2d(conv2_out,
                                         self.conv_depths[1],
                                         self.conv_patch_sizes[1],
                                         padding='same',
                                         activation=None,
                                         kernel_initializer=intitalizer)
            conv2_out = tf.layers.batch_normalization(conv2_out)
            conv2_out = tf.nn.leaky_relu(conv2_out)
            conv2_out = tf.layers.max_pooling2d(conv2_out,
                                                2,
                                                2,
                                                padding='same')

        with tf.name_scope('CNN_Block_3'):
            conv3_out = tf.layers.dropout(
                conv2_out,
                self.conv_dropouts[2],
                noise_shape=tf.concat([
                    tf.reshape(batch_size, [-1]),
                    tf.constant(value=[1, 1, self.conv_depths[1]])
                ], 0),
                training=self.is_training)
            conv3_out = tf.layers.conv2d(conv3_out,
                                         self.conv_depths[2],
                                         self.conv_patch_sizes[2],
                                         padding='same',
                                         activation=None,
                                         kernel_initializer=intitalizer)
            conv3_out = tf.layers.batch_normalization(conv3_out)
            conv3_out = tf.nn.leaky_relu(conv3_out)
            conv3_out = tf.layers.max_pooling2d(conv3_out,
                                                2,
                                                2,
                                                padding='same')

        with tf.name_scope('CNN_Block_4'):
            conv4_out = tf.layers.dropout(
                conv3_out,
                self.conv_dropouts[3],
                noise_shape=tf.concat([
                    tf.reshape(batch_size, [-1]),
                    tf.constant(value=[1, 1, self.conv_depths[2]])
                ], 0),
                training=self.is_training)
            conv4_out = tf.layers.conv2d(conv4_out,
                                         self.conv_depths[3],
                                         self.conv_patch_sizes[3],
                                         padding='same',
                                         activation=None,
                                         kernel_initializer=intitalizer)
            conv4_out = tf.layers.batch_normalization(conv4_out)
            conv4_out = tf.nn.leaky_relu(conv4_out)

        with tf.name_scope('CNN_Block_5'):
            conv5_out = tf.layers.dropout(
                conv4_out,
                self.conv_dropouts[4],
                noise_shape=tf.concat([
                    tf.reshape(batch_size, [-1]),
                    tf.constant(value=[1, 1, self.conv_depths[3]])
                ], 0),
                training=self.is_training)
            conv5_out = tf.layers.conv2d(conv5_out,
                                         self.conv_depths[4],
                                         self.conv_patch_sizes[4],
                                         padding='same',
                                         activation=None,
                                         kernel_initializer=intitalizer)
            conv5_out = tf.layers.batch_normalization(conv5_out)
            conv5_out = tf.nn.leaky_relu(conv5_out)

        output = tf.transpose(conv5_out, [2, 0, 1, 3])
        output = tf.reshape(output, [
            -1, batch_size,
            (self.config.im_height // self.reduce_factor) * self.conv_depths[4]
        ])
        self.length = tf.tile(tf.expand_dims(tf.shape(output)[0], axis=0),
                              [batch_size])

        # RNN
        with tf.variable_scope('MultiRNN', reuse=tf.AUTO_REUSE):
            for i in range(self.rnn_num_layers):
                output = tf.layers.dropout(output,
                                           self.rnn_dropout,
                                           training=self.is_training)
                lstm = tf.contrib.cudnn_rnn.CudnnLSTM(1, self.rnn_num_hidden,
                                                      'linear_input',
                                                      'bidirectional')
                output, state = lstm(output)

        # Fully Connected
        with tf.name_scope('Dense'):
            output = tf.concat(output, 2)
            # Linear dropout
            output = tf.layers.dropout(output,
                                       self.linear_dropout,
                                       training=self.is_training)
            # Reshaping to apply the same weights over the timesteps
            output = tf.reshape(output, [-1, 2 * self.rnn_num_hidden])
            # Doing the affine projection
            logits = tf.matmul(output, out_W) + out_b

        # Reshaping back to the original shape
        self.logits = tf.reshape(
            logits, [-1, batch_size, self.data_loader.num_classes])

        with tf.variable_scope('loss-acc'):
            self.loss = warpctc_tensorflow.ctc(
                self.logits, self.y.values, self.lab_length, self.length,
                self.data_loader.num_classes - 1)
            self.cost = tf.reduce_mean(self.loss)
            self.prediction = tf.nn.ctc_beam_search_decoder(
                self.logits, sequence_length=self.length, merge_repeated=False)
            self.cer = self.calc_cer(self.prediction[0][0], self.y)

        with tf.variable_scope('train_step'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step = tf.train.RMSPropOptimizer(
                    learning_rate=self.config.learning_rate,
                    decay=self.config.learning_rate_decay).minimize(
                        self.loss, global_step=self.global_step_tensor)

        tf.add_to_collection('train', self.train_step)
        tf.add_to_collection('train', self.cost)
        tf.add_to_collection('train', self.cer)
    def __init__(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            # e.g: log filter bank or MFCC features
            # Has size [batch_size, max_stepsize, num_features], but the
            # batch_size and max_stepsize can vary along each step
            self.inputs = tf.placeholder(tf.float32,
                                         [None, None, num_features])

            # Here we use sparse_placeholder that will generate a
            # SparseTensor required by ctc_loss op.
            #self.labels = tf.sparse_placeholder(tf.int32)
            self.labels = tf.placeholder(tf.int32, [None])

            # 1d array of size [batch_size]
            self.seq_len = tf.placeholder(tf.int32, [None])
            self.label_len = tf.placeholder(tf.int32, [None])

            # Defining the cell
            # Can be:
            #   tf.nn.rnn_cell.RNNCell
            #   tf.nn.rnn_cell.GRUCell
            #cell = tf.contrib.rnn.LSTMCell(FLAGS.num_hidden, state_is_tuple=True)
            #cell = tf.contrib.rnn.DropoutWrapper(cell = cell,output_keep_prob=0.8)
            #
            #cell1 = tf.contrib.rnn.LSTMCell(FLAGS.num_hidden, state_is_tuple=True)
            #cell1 = tf.contrib.rnn.DropoutWrapper(cell = cell1,output_keep_prob=0.8)
            # Stacking rnn cells
            #stack = tf.contrib.rnn.MultiRNNCell([cell,cell1] , state_is_tuple=True)
            stack = tf.contrib.rnn.MultiRNNCell([
                tf.contrib.rnn.LSTMCell(FLAGS.num_hidden, state_is_tuple=True)
                for _ in range(FLAGS.num_layers)
            ],
                                                state_is_tuple=True)

            # The second output is the last state and we will no use that
            outputs, _ = tf.nn.dynamic_rnn(stack,
                                           self.inputs,
                                           self.seq_len,
                                           dtype=tf.float32)

            shape = tf.shape(self.inputs)
            batch_s, max_timesteps = shape[0], shape[1]

            # Reshaping to apply the same weights over the timesteps
            outputs = tf.reshape(outputs, [-1, FLAGS.num_hidden])

            # Truncated normal with mean 0 and stdev=0.1
            # Tip: Try another initialization
            # see https://www.tensorflow.org/versions/r0.9/api_docs/python/contrib.layers.html#initializers
            W = tf.Variable(tf.truncated_normal(
                [FLAGS.num_hidden, num_classes], stddev=0.1, dtype=tf.float32),
                            name='W')
            # Zero initialization
            # Tip: Is tf.zeros_initializer the same?
            b = tf.Variable(
                tf.constant(0.,
                            dtype=tf.float32,
                            shape=[num_classes],
                            name='b'))

            # Doing the affine projection
            logits = tf.matmul(outputs, W) + b

            # Reshaping back to the original shape
            logits = tf.reshape(logits, [batch_s, -1, num_classes])

            # Time major
            logits = tf.transpose(logits, (1, 0, 2))

            self.global_step = tf.Variable(0, trainable=False)

            self.loss = warpctc_tensorflow.ctc(activations=logits,
                                               flat_labels=self.labels,
                                               label_lengths=self.label_len,
                                               input_lengths=self.seq_len)

            self.cost = tf.reduce_mean(self.loss)

            self.learning_rate = tf.train.exponential_decay(
                FLAGS.initial_learning_rate,
                self.global_step,
                FLAGS.decay_steps,
                FLAGS.decay_rate,
                staircase=True)

            # self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate,
            #         momentum=FLAGS.momentum).minimize(self.cost,global_step=self.global_step)
            #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
            #        momentum=FLAGS.momentum,use_nesterov=True).minimize(cost,global_step=global_step)
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate,
                beta1=FLAGS.beta1,
                beta2=FLAGS.beta2).minimize(self.loss,
                                            global_step=self.global_step)

            # Option 2: tf.contrib.ctc.ctc_beam_search_decoder
            # (it's slower but you'll get better results)
            #decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, seq_len,merge_repeated=False)
            self.decoded, self.log_prob = tf.nn.ctc_beam_search_decoder(
                logits, self.seq_len, merge_repeated=True)
            #dense_decoded = tf.cast(tf.sparse_tensor_to_dense(self.decoded[0],default_value=-1),tf.int32)

            # Inaccuracy: label error rate
            #self.lerr = tf.reduce_mean(tf.edit_distance(tf.cast(dense_decoded, tf.int32), self.labels))

            tf.summary.scalar('cost', self.cost)
            #tf.summary.scalar('lerr',self.lerr)
            self.merged_summay = tf.summary.merge_all()
def loss(logits, seq_lens, labels, label_lens):
    loss = warpctc_tensorflow.ctc(activations=logits,flat_labels=labels,label_lengths=label_lens,input_lengths=seq_lens)
    cost = tf.reduce_mean(loss)
    tf.add_to_collection('losses', cost)
    return tf.add_n(tf.get_collection('losses'), name='total_loss')