def decoder_layer(input_ph,
                  num_layers: int,
                  num_units: List[int],
                  activation_list,
                  name: str = 'decoder',
                  use_batch_normalization: bool = True,
                  train_ph: bool = True,
                  use_tensorboard: bool = True,
                  keep_prob_list: List[float] = 0,
                  tensorboard_scope: str = None):

    return dense_multilayer(input_ph, num_layers, num_units, name,
                            activation_list, use_batch_normalization, train_ph,
                            use_tensorboard, keep_prob_list, tensorboard_scope)
Esempio n. 2
0
def model_fn(features, labels, mode, config, params):

    feature = features['feature']
    feat_len = features['feat_len']
    sparse_target = labels

    global_step = tf.train.get_global_step()

    with tf.name_scope("seq_len"):
        input_features_length = feat_len

    with tf.name_scope("input_features"):
        input_features = feature

    with tf.name_scope("input_labels"):
        input_labels = sparse_target

    subsample_factor = params["num_reduce_by_half"]
    if subsample_factor is not None and subsample_factor > 0:
        for i in range(subsample_factor):
            input_features_length = tf.div(input_features_length, 2) + tf.cast(
                input_features_length % 2, dtype=tf.int32)
            input_features = input_features[:, ::2]

    if params['noise_stddev'] is not None and params['noise_stddev'] != 0.0:
        input_features = tf.keras.layers.GaussianNoise(
            stddev=params['noise_stddev'])(
                inputs=input_features,
                training=mode == tf.estimator.ModeKeys.TRAIN)

    rnn_input = tf.identity(input_features)
    with tf.name_scope("dense_layer_1"):
        rnn_input = dense_multilayer(
            input_ph=rnn_input,
            num_layers=params['num_dense_layers_1'],
            num_units=params['num_units_1'],
            name='dense_layer_1',
            activation_list=params['dense_activations_1'],
            use_batch_normalization=params['batch_normalization_1'],
            batch_normalization_trainable=params[
                'batch_normalization_trainable_1'],
            train_ph=mode == tf.estimator.ModeKeys.TRAIN,
            use_tensorboard=True,
            keep_prob_list=params['keep_prob_1'],
            kernel_initializers=params['kernel_init_1'],
            bias_initializers=params['bias_init_1'],
            tensorboard_scope='dense_layer_1')

    with tf.name_scope("RNN_cell"):
        if params['is_bidirectional']:
            rnn_outputs = bidirectional_rnn(
                input_ph=rnn_input,
                seq_len_ph=input_features_length,
                num_layers=len(params['num_cell_units']),
                num_cell_units=params['num_cell_units'],
                activation_list=params['cell_activation'],
                use_tensorboard=True,
                tensorboard_scope='RNN',
                train_ph=mode == tf.estimator.ModeKeys.TRAIN,
                keep_prob_list=params['keep_prob_rnn'],
                use_batch_normalization=params["rnn_batch_normalization"] ==
                True)

        else:
            rnn_outputs = unidirectional_rnn(
                input_ph=rnn_input,
                seq_len_ph=input_features_length,
                num_layers=len(params['num_cell_units']),
                num_cell_units=params['num_cell_units'],
                activation_list=params['cell_activation'],
                use_tensorboard=True,
                tensorboard_scope='RNN',
                train_ph=mode == tf.estimator.ModeKeys.TRAIN,
                keep_prob_list=params['keep_prob_rnn'],
                use_batch_normalization=params["rnn_batch_normalization"] ==
                True)

    with tf.name_scope("dense_layer_2"):
        rnn_outputs = dense_multilayer(
            input_ph=rnn_outputs,
            num_layers=params['num_dense_layers_2'],
            num_units=params['num_units_2'],
            name='dense_layer_2',
            activation_list=params['dense_activations_2'],
            use_batch_normalization=params['batch_normalization_2'],
            batch_normalization_trainable=params[
                'batch_normalization_trainable_2'],
            train_ph=mode == tf.estimator.ModeKeys.TRAIN,
            use_tensorboard=True,
            keep_prob_list=params['keep_prob_2'],
            kernel_initializers=params['kernel_init_2'],
            bias_initializers=params['bias_init_2'],
            tensorboard_scope='dense_layer_2',
            # batch_normalization_training=True
        )

    with tf.name_scope("dense_output"):
        dense_output_no_activation = dense_layer(
            input_ph=rnn_outputs,
            num_units=params['num_classes'],
            name='dense_output_no_activation',
            activation=None,
            use_batch_normalization=False,
            train_ph=False,
            use_tensorboard=True,
            keep_prob=1,
            tensorboard_scope='dense_output')

        dense_output = tf.nn.softmax(dense_output_no_activation,
                                     name='dense_output')
        tf.summary.histogram('dense_output', dense_output)

    with tf.name_scope("decoder"):
        output_time_major = tf.transpose(dense_output, (1, 0, 2))
        if params['beam_width'] == 0:
            decoded, log_prob = tf.nn.ctc_greedy_decoder(output_time_major,
                                                         input_features_length,
                                                         merge_repeated=True)
        else:
            decoded, log_prob = tf.nn.ctc_beam_search_decoder(
                output_time_major,
                input_features_length,
                beam_width=params['beam_width'],
                top_paths=1,
                merge_repeated=False)
        dense_decoded = tf.sparse.to_dense(sp_input=decoded[0],
                                           validate_indices=True)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=dense_decoded)

    with tf.name_scope("loss"):
        rnn_loss = 0
        for var in tf.trainable_variables():
            if var.name.startswith('RNN_cell') and 'kernel' in var.name:
                rnn_loss += tf.nn.l2_loss(var)

        dense_loss = 0
        for var in tf.trainable_variables():
            if var.name.startswith('dense_layer') or \
                    var.name.startswith('input_dense_layer') and \
                    'kernel' in var.name:
                dense_loss += tf.nn.l2_loss(var)

        loss = tf.nn.ctc_loss(input_labels,
                              dense_output_no_activation,
                              input_features_length,
                              time_major=False)
        logits_loss = tf.reduce_mean(tf.reduce_sum(loss))
        loss = logits_loss \
               + params['rnn_regularizer'] * rnn_loss \
               + params['dense_regularizer'] * dense_loss
        tf.summary.scalar('loss', loss)

    with tf.name_scope("label_error_rate"):
        # Inaccuracy: label error rate
        ler = tf.reduce_mean(
            tf.edit_distance(hypothesis=tf.cast(decoded[0], tf.int32),
                             truth=input_labels,
                             normalize=True))
        metrics = {
            'LER': tf.metrics.mean(ler),
        }
        tf.summary.scalar('label_error_rate', tf.reduce_mean(ler))

    logging_hook = tf.train.LoggingTensorHook(tensors={
        "loss": loss,
        "ler": ler,
    },
                                              every_n_iter=1)

    if mode == tf.estimator.ModeKeys.TRAIN:
        if params['use_learning_rate_decay']:
            learning_rate = tf.train.exponential_decay(
                params['learning_rate'],
                global_step,
                decay_steps=params['learning_rate_decay_steps'],
                decay_rate=params['learning_rate_decay'],
                staircase=True)
        else:
            learning_rate = params['learning_rate']

        if params['optimizer'] == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif params['optimizer'] == 'momentum' and params[
                'momentum'] is not None:
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=params['momentum'])
        elif params['optimizer'] == 'rms':
            optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
        else:
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

        loss = tf.tuple([loss],
                        control_inputs=tf.get_collection(
                            tf.GraphKeys.UPDATE_OPS))[0]
        if params['clip_gradient'] != 0:
            grads = tf.gradients(loss, tf.trainable_variables())
            grads, _ = tf.clip_by_global_norm(grads, params['clip_gradient'])
            grads_and_vars = list(zip(grads, tf.trainable_variables()))
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
        else:
            train_op = optimizer.minimize(loss, global_step=global_step)

        train_logging_hook = tf.train.LoggingTensorHook(
            tensors={
                'loss': loss,
                'ler': tf.reduce_mean(ler),
                'learning_rate': tf.reduce_mean(learning_rate),
                # 'feal_len': feat_len,
                # 'feal_len2': input_features_length,
                # 'feal_len3': tf.shape(input_features),
            },
            every_n_secs=1)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=[train_logging_hook],
                                          eval_metric_ops=metrics)

    if mode == tf.estimator.ModeKeys.EVAL:

        def _create_alignment_images_summary(outputs):
            images = outputs
            images = tf.expand_dims(images, -1)
            # Scale to range [0, 255]
            images -= 1
            images = -images
            images *= 255
            summary = tf.summary.image("alignment_images", images)
            return summary

        with tf.name_scope('alignment'):
            alignment_summary = _create_alignment_images_summary(dense_output)

        eval_summary_hook = tf.train.SummarySaverHook(
            save_steps=10,
            output_dir=os.path.join(config.model_dir, 'eval'),
            summary_op=alignment_summary)

        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            evaluation_hooks=[logging_hook, eval_summary_hook],
            eval_metric_ops=metrics)
    def create_graph(self):

        with self.graph.as_default():
            self.tf_is_traing_pl = tf.placeholder_with_default(
                True, shape=(), name='is_training')

            with tf.name_scope("seq_len"):
                self.seq_len = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name="sequence_length")

            with tf.name_scope("input_features"):
                self.input_feature = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, None, self.network_data.num_features],
                    name="input")
                tf.summary.image('feature', [tf.transpose(self.input_feature)])
            with tf.name_scope("input_labels"):
                self.input_label = tf.sparse_placeholder(dtype=tf.int32,
                                                         shape=[None, None],
                                                         name="input_label")

            self.dense_layer_1 = tf.identity(self.input_feature)
            with tf.name_scope("dense_layer_1"):
                self.dense_layer_1 = dense_multilayer(
                    input_ph=self.dense_layer_1,
                    num_layers=self.network_data.num_dense_layers_1,
                    num_units=self.network_data.num_dense_units_1,
                    name='dense_layer_1',
                    activation_list=self.network_data.dense_activations_1,
                    use_batch_normalization=self.network_data.
                    batch_normalization_1,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_1,
                    kernel_initializers=self.network_data.kernel_init_1,
                    bias_initializers=self.network_data.bias_init_1,
                    tensorboard_scope='dense_layer_1')

            with tf.name_scope("RNN_1"):
                if self.network_data.is_bidirectional_1:
                    self.rnn_outputs_1 = bidirectional_rnn(
                        input_ph=self.dense_layer_1,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_fw_cell_units_1),
                        num_fw_cell_units=self.network_data.
                        num_fw_cell_units_1,
                        num_bw_cell_units=self.network_data.
                        num_bw_cell_units_1,
                        name="RNN_1",
                        activation_fw_list=self.network_data.
                        cell_fw_activation_1,
                        activation_bw_list=self.network_data.
                        cell_bw_activation_1,
                        use_tensorboard=True,
                        tensorboard_scope='RNN_1',
                        output_size=self.network_data.rnn_output_sizes_1)

                else:
                    self.rnn_outputs_1 = unidirectional_rnn(
                        input_ph=self.dense_layer_1,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_cell_units_1),
                        num_cell_units=self.network_data.num_cell_units_1,
                        name="RNN_1",
                        activation_list=self.network_data.cell_activation_1,
                        use_tensorboard=True,
                        tensorboard_scope='RNN_1',
                        output_size=self.network_data.rnn_output_sizes_1)

            with tf.name_scope("dense_layer_2"):
                self.dense_layer_2 = dense_multilayer(
                    input_ph=self.rnn_outputs_1,
                    num_layers=self.network_data.num_dense_layers_2,
                    num_units=self.network_data.num_dense_units_2,
                    name='dense_layer_2',
                    activation_list=self.network_data.dense_activations_2,
                    use_batch_normalization=self.network_data.
                    batch_normalization_2,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_2,
                    kernel_initializers=self.network_data.kernel_init_2,
                    bias_initializers=self.network_data.bias_init_2,
                    tensorboard_scope='dense_layer_2')

            with tf.name_scope("dense_output_1"):
                self.dense_output_no_activation_1 = dense_layer(
                    input_ph=self.dense_layer_2,
                    num_units=self.network_data.num_classes,
                    name='dense_output_no_activation_1',
                    activation=None,
                    use_batch_normalization=False,
                    train_ph=False,
                    use_tensorboard=True,
                    keep_prob=1,
                    tensorboard_scope='dense_output_1')

                self.dense_output_1 = tf.nn.softmax(
                    self.dense_output_no_activation_1, name='dense_output_1')
                tf.summary.histogram('dense_output_1', self.dense_output_1)

            with tf.name_scope("decoder_1"):
                self.output_time_major_1 = tf.transpose(
                    self.dense_output_1, (1, 0, 2))
                self.decoded_1, log_prob = self.network_data.decoder_function(
                    self.output_time_major_1, self.seq_len)
                self.dense_decoded_1 = tf.sparse_to_dense(
                    self.decoded_1[0].indices, self.decoded_1[0].dense_shape,
                    self.decoded_1[0].values)

            with tf.name_scope("dense_layer_3"):
                self.dense_layer_3 = dense_multilayer(
                    input_ph=self.dense_output_1,
                    num_layers=self.network_data.num_dense_layers_3,
                    num_units=self.network_data.num_dense_units_3,
                    name='dense_layer_3',
                    activation_list=self.network_data.dense_activations_3,
                    use_batch_normalization=self.network_data.
                    batch_normalization_3,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_3,
                    kernel_initializers=self.network_data.kernel_init_3,
                    bias_initializers=self.network_data.bias_init_3,
                    tensorboard_scope='dense_layer_3')

            with tf.name_scope("RNN_2"):
                if self.network_data.is_bidirectional_2:
                    self.rnn_outputs_2 = bidirectional_rnn(
                        input_ph=self.dense_layer_3,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_fw_cell_units_2),
                        num_fw_cell_units=self.network_data.
                        num_fw_cell_units_2,
                        num_bw_cell_units=self.network_data.
                        num_bw_cell_units_2,
                        name="RNN_2",
                        activation_fw_list=self.network_data.
                        cell_fw_activation_2,
                        activation_bw_list=self.network_data.
                        cell_bw_activation_2,
                        use_tensorboard=True,
                        tensorboard_scope='RNN_2',
                        output_size=self.network_data.rnn_output_sizes_2)

                else:
                    self.rnn_outputs_2 = unidirectional_rnn(
                        input_ph=self.dense_layer_3,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_cell_units_2),
                        num_cell_units=self.network_data.num_cell_units_2,
                        name="RNN_2",
                        activation_list=self.network_data.cell_activation_2,
                        use_tensorboard=True,
                        tensorboard_scope='RNN_2',
                        output_size=self.network_data.rnn_output_sizes_2)

            with tf.name_scope("dense_layer_4"):
                self.dense_layer_4 = dense_multilayer(
                    input_ph=self.rnn_outputs_2,
                    num_layers=self.network_data.num_dense_layers_4,
                    num_units=self.network_data.num_dense_units_4,
                    name='dense_layer_4',
                    activation_list=self.network_data.dense_activations_4,
                    use_batch_normalization=self.network_data.
                    batch_normalization_4,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_4,
                    kernel_initializers=self.network_data.kernel_init_4,
                    bias_initializers=self.network_data.bias_init_4,
                    tensorboard_scope='dense_layer_4')

            with tf.name_scope("dense_output_2"):
                self.dense_output_no_activation_2 = dense_layer(
                    input_ph=self.dense_layer_4,
                    num_units=self.network_data.num_classes,
                    name='dense_output_no_activation_2',
                    activation=None,
                    use_batch_normalization=False,
                    train_ph=False,
                    use_tensorboard=True,
                    keep_prob=1,
                    tensorboard_scope='dense_output_no_activation_2')

                self.dense_output_2 = tf.nn.softmax(
                    self.dense_output_no_activation_2, name='dense_output_2')
                tf.summary.histogram('dense_output_2', self.dense_output_2)

            with tf.name_scope("decoder_2"):
                self.output_time_major_2 = tf.transpose(
                    self.dense_output_2, (1, 0, 2))
                self.decoded_2, log_prob = self.network_data.decoder_function(
                    self.output_time_major_2, self.seq_len)
                self.dense_decoded_2 = tf.sparse_to_dense(
                    self.decoded_2[0].indices, self.decoded_2[0].dense_shape,
                    self.decoded_2[0].values)

            with tf.name_scope("loss"):
                rnn_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('RNN_') and 'kernel' in var.name:
                        rnn_loss += tf.nn.l2_loss(var)

                dense_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('dense_layer') or \
                            var.name.startswith('dense_layer') and \
                            'kernel' in var.name:
                        dense_loss += tf.nn.l2_loss(var)

                loss_1 = tf.nn.ctc_loss(self.input_label,
                                        self.dense_output_no_activation_1,
                                        self.seq_len,
                                        time_major=False)
                loss_2 = tf.nn.ctc_loss(self.input_label,
                                        self.dense_output_no_activation_2,
                                        self.seq_len,
                                        time_major=False)
                self.logits_loss = tf.reduce_mean(tf.reduce_sum(
                    loss_2)) + 0.3 * tf.reduce_mean(tf.reduce_sum(loss_1))
                self.loss = self.logits_loss \
                            + self.network_data.rnn_regularizer * rnn_loss \
                            + self.network_data.dense_regularizer * dense_loss
                tf.summary.scalar('loss', self.loss)

            # define the optimizer
            with tf.name_scope("training"):
                self.training_op = self.network_data.optimizer.minimize(
                    self.loss)

            with tf.name_scope("label_error_rate"):
                # Inaccuracy: label error rate
                self.ler = tf.reduce_mean(
                    tf.edit_distance(hypothesis=tf.cast(
                        self.decoded_2[0], tf.int32),
                                     truth=self.input_label,
                                     normalize=True))
                tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler))

            self.checkpoint_saver = tf.train.Saver(save_relative_paths=True)
            self.merged_summary = tf.summary.merge_all()
Esempio n. 4
0
    def create_graph(self):

        with self.graph.as_default():
            self.tf_is_traing_pl = tf.placeholder_with_default(
                True, shape=(), name='is_training')

            with tf.name_scope("seq_len"):
                self.seq_len = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name="sequence_length")

            with tf.name_scope("input_features"):
                self.input_feature = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, None, self.network_data.num_features],
                    name="input")
                tf.summary.image('feature', [tf.transpose(self.input_feature)])
            with tf.name_scope("input_labels"):
                self.input_label = tf.sparse_placeholder(dtype=tf.int32,
                                                         shape=[None, None],
                                                         name="input_label")

            self.dense_layer_1 = tf.identity(self.input_feature)
            with tf.name_scope("dense_layer_1"):
                self.dense_layer_1 = dense_multilayer(
                    input_ph=self.dense_layer_1,
                    num_layers=self.network_data.num_dense_layers_1,
                    num_units=self.network_data.num_dense_units_1,
                    name='dense_layer_1',
                    activation_list=self.network_data.dense_activations_1,
                    use_batch_normalization=self.network_data.
                    batch_normalization_1,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_1,
                    kernel_initializers=self.network_data.kernel_init_1,
                    bias_initializers=self.network_data.bias_init_1,
                    tensorboard_scope='dense_layer_1')

            with tf.name_scope("RNN_cell"):
                if self.network_data.is_bidirectional:
                    self.rnn_outputs = bidirectional_rnn(
                        input_ph=self.dense_layer_1,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_fw_cell_units),
                        num_fw_cell_units=self.network_data.num_fw_cell_units,
                        num_bw_cell_units=self.network_data.num_bw_cell_units,
                        name="RNN_cell",
                        activation_fw_list=self.network_data.
                        cell_fw_activation,
                        activation_bw_list=self.network_data.
                        cell_bw_activation,
                        use_tensorboard=True,
                        tensorboard_scope='RNN',
                        output_size=self.network_data.rnn_output_sizes)

                else:
                    self.rnn_outputs = unidirectional_rnn(
                        input_ph=self.dense_layer_1,
                        seq_len_ph=self.seq_len,
                        num_layers=len(self.network_data.num_cell_units),
                        num_cell_units=self.network_data.num_cell_units,
                        name="RNN_cell",
                        activation_list=self.network_data.cell_activation,
                        use_tensorboard=True,
                        tensorboard_scope='RNN',
                        output_size=self.network_data.rnn_output_sizes)

            with tf.name_scope("dense_layer_2"):
                self.dense_layer_2 = dense_multilayer(
                    input_ph=self.rnn_outputs,
                    num_layers=self.network_data.num_dense_layers_2,
                    num_units=self.network_data.num_dense_units_2,
                    name='dense_layer_2',
                    activation_list=self.network_data.dense_activations_2,
                    use_batch_normalization=self.network_data.
                    batch_normalization_2,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_dropout_2,
                    kernel_initializers=self.network_data.kernel_init_2,
                    bias_initializers=self.network_data.bias_init_2,
                    tensorboard_scope='dense_layer_2')

            with tf.name_scope("dense_output"):
                self.dense_output_no_activation = dense_layer(
                    input_ph=self.rnn_outputs,
                    num_units=self.network_data.num_classes,
                    name='dense_output_no_activation',
                    activation=None,
                    use_batch_normalization=False,
                    train_ph=False,
                    use_tensorboard=True,
                    keep_prob=1,
                    tensorboard_scope='dense_output')

                self.dense_output = tf.nn.softmax(
                    self.dense_output_no_activation, name='dense_output')
                tf.summary.histogram('dense_output', self.dense_output)

            with tf.name_scope("output_classes"):
                self.output_classes = tf.argmax(self.dense_output, 2)

            with tf.name_scope("loss"):
                rnn_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith(
                            'RNN_cell') and 'kernel' in var.name:
                        rnn_loss += tf.nn.l2_loss(var)

                dense_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith(
                            'dense_layer') and 'kernel' in var.name:
                        dense_loss += tf.nn.l2_loss(var)

                loss = tf.nn.ctc_loss(self.input_label,
                                      self.dense_output_no_activation,
                                      self.seq_len,
                                      time_major=False)
                self.logits_loss = tf.reduce_mean(tf.reduce_sum(loss))
                self.loss = self.logits_loss \
                            + self.network_data.rnn_regularizer * rnn_loss \
                            + self.network_data.dense_regularizer * dense_loss
                tf.summary.scalar('loss', self.loss)

            # define the optimizer
            with tf.name_scope("training"):
                self.training_op = self.network_data.optimizer.minimize(
                    self.loss)

            with tf.name_scope("decoder"):
                self.output_time_major = tf.transpose(self.dense_output,
                                                      (1, 0, 2))

                self.word_beam_search_module = tf.load_op_library(
                    self.network_data.word_beam_search_path)
                # prepare information about language (dictionary, characters in dataset, characters forming words)
                chars = str().join(self.network_data.char_list)
                word_chars = open(self.network_data.word_char_list_path).read(
                ).splitlines()[0]
                corpus = open(self.network_data.corpus_path).read()

                # decode using the "Words" mode of word beam search
                self.decoded = self.word_beam_search_module.word_beam_search(
                    self.output_time_major, self.network_data.beam_width,
                    self.network_data.scoring_mode,
                    self.network_data.smoothing, corpus.encode('utf8'),
                    chars.encode('utf8'), word_chars.encode('utf8'))

            with tf.name_scope("label_error_rate"):
                # No es la mejor forma de calcular el LER, pero ya probé varias y esta fue la que mejor anduvo
                # Inaccuracy: label error rate
                dense_label = tf.sparse_to_dense(self.input_label.indices,
                                                 self.input_label.dense_shape,
                                                 self.input_label.values)
                # (self.network_data.num_classes-1) its the blank index
                decoded_mask = tf.not_equal(self.decoded,
                                            self.network_data.num_classes - 1)
                decoded_mask.set_shape([None, None])
                decoded_mask = tf.boolean_mask(self.decoded, decoded_mask)

                label_mask = tf.not_equal(dense_label,
                                          self.network_data.num_classes - 1)
                label_mask.set_shape([None, None])
                label_mask = tf.boolean_mask(dense_label, label_mask)

                self.edit_distance = tf.edit_distance(
                    hypothesis=tf.cast(
                        tf.contrib.layers.dense_to_sparse([decoded_mask]),
                        tf.int32),
                    truth=tf.cast(
                        tf.contrib.layers.dense_to_sparse([label_mask]),
                        tf.int32),
                    normalize=True)
                self.ler = tf.reduce_mean(self.edit_distance)
                tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler))

            self.checkpoint_saver = tf.train.Saver(save_relative_paths=True)
            self.merged_summary = tf.summary.merge_all()
Esempio n. 5
0
    def create_graph(self,
                     use_tfrecords=False,
                     features_tensor=None,
                     labels_tensor=None,
                     features_len_tensor=None,
                     labels_len_tensor=None):

        with self.graph.as_default():
            self.tf_is_traing_pl = tf.placeholder_with_default(
                True, shape=(), name='is_training')

            with tf.name_scope("input_features"):
                if use_tfrecords:
                    self.input_features = features_tensor
                else:
                    self.input_features = tf.placeholder(
                        dtype=tf.float32,
                        shape=[None, None, self.network_data.num_features],
                        name="input_features")
            with tf.name_scope("input_features_length"):
                if use_tfrecords:
                    self.input_features_length = features_len_tensor
                else:
                    self.input_features_length = tf.placeholder(
                        dtype=tf.int32,
                        shape=[None],
                        name='input_features_length')
            with tf.name_scope("input_labels"):
                if use_tfrecords:
                    self.input_labels = labels_tensor
                else:
                    self.input_labels = tf.placeholder(dtype=tf.int32,
                                                       shape=[None, None],
                                                       name='input_labels')
            with tf.name_scope("input_labels_length"):
                if use_tfrecords:
                    self.input_labels_length = labels_len_tensor
                else:
                    self.input_labels_length = tf.placeholder(
                        dtype=tf.int32,
                        shape=[None],
                        name='input_labels_length')

            self.max_label_length = tf.reduce_max(self.input_labels_length,
                                                  name='max_label_length')
            self.max_features_length = tf.reduce_max(
                self.input_features_length, name='max_features_length')
            self.batch_size = tf.shape(self.input_features)[0]
            self.global_step = tf.Variable(0,
                                           trainable=False,
                                           name='global_step')

            with tf.name_scope("embeddings"):
                self.embedding = tf.get_variable(
                    name='embedding',
                    shape=[
                        self.network_data.num_classes + 1,
                        self.network_data.num_embeddings
                    ],
                    dtype=tf.float32)

                self.label_embedding = tf.nn.embedding_lookup(
                    params=self.embedding,
                    ids=self.input_labels,
                    name='label_embedding')

            with tf.name_scope("dense_layer_1"):
                self.dense_layer_1_out = dense_multilayer(
                    input_ph=self.input_features,
                    num_layers=self.network_data.num_dense_layers_1,
                    num_units=self.network_data.num_units_1,
                    name='dense_layer_1',
                    activation_list=self.network_data.dense_activations_1,
                    use_batch_normalization=self.network_data.
                    batch_normalization_1,
                    train_ph=self.tf_is_traing_pl,
                    use_tensorboard=True,
                    keep_prob_list=self.network_data.keep_prob_1,
                    kernel_initializers=self.network_data.kernel_init_1,
                    bias_initializers=self.network_data.bias_init_1,
                    tensorboard_scope='dense_layer_1')

            with tf.name_scope("listener"):
                self.listener_output, self.listener_out_len, self.listener_state = bidirectional_pyramidal_rnn(
                    input_ph=self.dense_layer_1_out,
                    seq_len_ph=self.input_features_length,
                    num_layers=self.network_data.listener_num_layers,
                    num_units=self.network_data.listener_num_units,
                    name="listener",
                    activation_list=self.network_data.listener_activation_list,
                    use_tensorboard=True,
                    tensorboard_scope="listener",
                    keep_prob=self.network_data.listener_keep_prob_list,
                    train_ph=self.tf_is_traing_pl)

            with tf.variable_scope("attention"):
                cell, decoder_initial_state = attention_layer(
                    input=self.listener_output,
                    num_layers=self.network_data.attention_num_layers,
                    rnn_units_list=list(
                        map(lambda x: 2 * x,
                            self.network_data.listener_num_units)),
                    rnn_activations_list=self.network_data.
                    attention_activation_list,
                    attention_units=self.network_data.attention_units,
                    lengths=self.listener_out_len,
                    batch_size=self.batch_size,
                    input_state=self.listener_state,
                    keep_prob=self.network_data.attention_keep_prob_list,
                    train_ph=self.tf_is_traing_pl)

                self.logits, _, _ = attention_decoder(
                    input_cell=cell,
                    initial_state=decoder_initial_state,
                    embedding=self.embedding,
                    seq_embedding=self.label_embedding,
                    seq_embedding_len=self.input_labels_length,
                    output_projection=Dense(self.network_data.num_classes),
                    max_iterations=self.max_label_length,
                    sampling_prob=0.5,
                    time_major=False,
                    name="attention")

            with tf.name_scope("tile_batch"):
                if self.network_data.beam_width > 0:
                    tiled_listener_output = tf.contrib.seq2seq.tile_batch(
                        self.listener_output,
                        multiplier=self.network_data.beam_width)
                    tiled_listener_state = tf.contrib.seq2seq.tile_batch(
                        self.listener_state,
                        multiplier=self.network_data.beam_width)
                    tiled_listener_out_len = tf.contrib.seq2seq.tile_batch(
                        self.listener_out_len,
                        multiplier=self.network_data.beam_width)
                    tiled_batch_size = self.batch_size * self.network_data.beam_width

                else:
                    tiled_listener_output = self.listener_output
                    tiled_listener_state = self.listener_state
                    tiled_listener_out_len = self.listener_out_len
                    tiled_batch_size = self.batch_size

            self.projection_layer = Dense(self.network_data.num_classes,
                                          use_bias=True)

            with tf.variable_scope("attention", reuse=True):

                tiled_cell, tiled_decoder_initial_state = attention_layer(
                    input=tiled_listener_output,
                    num_layers=self.network_data.attention_num_layers,
                    rnn_units_list=list(
                        map(lambda x: 2 * x,
                            self.network_data.listener_num_units)),
                    rnn_activations_list=self.network_data.
                    attention_activation_list,
                    attention_units=self.network_data.attention_units,
                    lengths=tiled_listener_out_len,
                    batch_size=tiled_batch_size,
                    input_state=tuple(tiled_listener_state),
                    keep_prob=None,
                    train_ph=self.tf_is_traing_pl)

                start_tokens = tf.fill([self.batch_size],
                                       self.network_data.sos_id)

                if self.network_data.beam_width > 0:
                    decoded_ids = beam_search_decoder(
                        input_cell=tiled_cell,
                        embedding=self.embedding,
                        initial_state=tiled_decoder_initial_state,
                        start_token=start_tokens,
                        end_token=self.network_data.eos_id,
                        beam_width=self.network_data.beam_width,
                        output_layer=self.projection_layer,
                        max_iterations=self.max_features_length,
                        name="attention",
                        time_major=False)
                    decoded_ids = decoded_ids[:, :, 0]  # Most probable beam

                else:
                    decoded_ids = greedy_decoder(
                        input_cell=tiled_cell,
                        embedding=self.embedding,
                        initial_state=tiled_decoder_initial_state,
                        start_token=start_tokens,
                        end_token=self.network_data.eos_id,
                        output_layer=self.projection_layer,
                        max_iterations=self.max_features_length,
                        name="attention",
                        time_major=False)

            with tf.name_scope('decoded_ids'):
                self.decoded_ids = tf.identity(decoded_ids, name='decoded_ids')

            with tf.name_scope("loss"):
                kernel_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith(
                            'dense_layer') and 'kernel' in var.name:
                        kernel_loss += tf.nn.l2_loss(var)

                for var in tf.trainable_variables():
                    if var.name.startswith(
                            'listener') and 'kernel' in var.name:
                        kernel_loss += tf.nn.l2_loss(var)

                for var in tf.trainable_variables():
                    if var.name.startswith(
                            'attention') and 'kernel' in var.name:
                        kernel_loss += tf.nn.l2_loss(var)

                target_weights = tf.sequence_mask(self.input_labels_length,
                                                  self.max_label_length,
                                                  dtype=tf.float32,
                                                  name='mask')

                sequence_loss = tf.contrib.seq2seq.sequence_loss(
                    logits=self.logits,
                    targets=self.input_labels,
                    weights=target_weights,
                    average_across_timesteps=True,
                    average_across_batch=True)

                self.loss = sequence_loss + self.network_data.kernel_regularizer * kernel_loss
                tf.summary.scalar('sequence_loss', sequence_loss)
                tf.summary.scalar('loss', self.loss)

            with tf.name_scope("label_error_rate"):
                train_decoded_ids = tf.argmax(tf.nn.softmax(self.logits,
                                                            axis=2),
                                              axis=2)
                self.train_ler = tf.reduce_mean(
                    tf.edit_distance(
                        hypothesis=tf.contrib.layers.dense_to_sparse(
                            tf.cast(train_decoded_ids, tf.int32)),
                        truth=tf.contrib.layers.dense_to_sparse(
                            self.input_labels),
                        normalize=True))
                self.ler = tf.reduce_mean(
                    tf.edit_distance(
                        hypothesis=tf.contrib.layers.dense_to_sparse(
                            tf.cast(self.decoded_ids, tf.int32)),
                        truth=tf.contrib.layers.dense_to_sparse(
                            self.input_labels),
                        normalize=True))
                tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler))
                tf.summary.scalar('train_label_error_rate',
                                  tf.reduce_mean(self.train_ler))

            with tf.name_scope("training_op"):
                if self.network_data.use_learning_rate_decay:
                    self.learning_rate = tf.train.exponential_decay(
                        self.network_data.learning_rate,
                        self.global_step,
                        decay_steps=self.network_data.
                        learning_rate_decay_steps,
                        decay_rate=self.network_data.learning_rate_decay,
                        staircase=True)
                else:
                    self.learning_rate = self.network_data.learning_rate

                opt = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=self.network_data.adam_beta1,
                    beta2=self.network_data.adam_beta2,
                    epsilon=self.network_data.adam_epsilon)

                if self.network_data.clip_norm > 0:
                    grads, vs = zip(*opt.compute_gradients(self.loss))
                    grads, _ = tf.clip_by_global_norm(
                        grads, self.network_data.clip_norm)
                    self.train_op = opt.apply_gradients(
                        zip(grads, vs), global_step=self.global_step)
                else:
                    self.train_op = self.network_data.optimizer.minimize(
                        self.loss)

            self.checkpoint_saver = tf.train.Saver(save_relative_paths=True)
            self.merged_summary = tf.summary.merge_all()
Esempio n. 6
0
def model_fn(features,
             labels,
             mode,
             config,
             params):

    input_features = features['feature']
    input_features_length = features['feat_len']

    subsample_factor = params["num_reduce_by_half"]
    if subsample_factor is not None and subsample_factor > 0:
        for i in range(subsample_factor):
            input_features_length = tf.div(input_features_length, 2) + tf.cast(input_features_length % 2,
                                                                               dtype=tf.int32)
            input_features = input_features[:, ::2]

    if params['noise_stddev'] is not None and params['noise_stddev'] != 0.0:
        input_features = tf.keras.layers.GaussianNoise(stddev=params['noise_stddev'])(inputs=input_features, training=mode == tf.estimator.ModeKeys.TRAIN)

    decoder_inputs = None
    targets = None
    targets_length = None

    global_step = tf.train.get_global_step()

    if mode != tf.estimator.ModeKeys.PREDICT:
        decoder_inputs = labels['targets_inputs']
        targets = labels['targets_outputs']
        targets_length = labels['target_len']

    with tf.name_scope("dense_layer_1"):
        input_features = dense_multilayer(input_ph=input_features,
                                          num_layers=params['num_dense_layers_1'],
                                          num_units=params['num_units_1'],
                                          name='dense_layer_1',
                                          activation_list=params['dense_activations_1'],
                                          use_batch_normalization=params['batch_normalization_1'],
                                          batch_normalization_trainable=params['batch_normalization_trainable_1'],
                                          train_ph=mode == tf.estimator.ModeKeys.TRAIN,
                                          use_tensorboard=True,
                                          keep_prob_list=params['keep_prob_1'],
                                          kernel_initializers=params['kernel_init_1'],
                                          bias_initializers=params['bias_init_1'],
                                          tensorboard_scope='dense_layer_1')

    with tf.variable_scope('listener'):
        listener_output, input_features_length, listener_state = bidirectional_pyramidal_rnn(
            input_ph=input_features,
            seq_len_ph=input_features_length,
            num_layers=params['listener_num_layers'],
            num_units=params['listener_num_units'],
            name="listener",
            activation_list=params['listener_activation_list'],
            use_tensorboard=True,
            tensorboard_scope="listener",
            keep_prob=params['listener_keep_prob_list'],
            train_ph=mode == tf.estimator.ModeKeys.TRAIN)

    with tf.name_scope("dense_layer_2"):
        listener_output = dense_multilayer(input_ph=listener_output,
                                           num_layers=params['num_dense_layers_2'],
                                           num_units=params['num_units_2'],
                                           name='dense_layer_2',
                                           activation_list=params['dense_activations_2'],
                                           use_batch_normalization=params['batch_normalization_2'],
                                           batch_normalization_trainable=params['batch_normalization_trainable_2'],
                                           train_ph=mode == tf.estimator.ModeKeys.TRAIN,
                                           use_tensorboard=True,
                                           keep_prob_list=params['keep_prob_2'],
                                           kernel_initializers=params['kernel_init_2'],
                                           bias_initializers=params['bias_init_2'],
                                           tensorboard_scope='dense_layer_2')

    with tf.variable_scope('tile_batch'):
        batch_size = tf.shape(listener_output)[0]
        if mode == tf.estimator.ModeKeys.PREDICT and params['beam_width'] > 0:
            listener_output = tf.contrib.seq2seq.tile_batch(
                listener_output, multiplier=params['beam_width'])
            input_features_length = tf.contrib.seq2seq.tile_batch(
                input_features_length, multiplier=params['beam_width'])
            listener_state = tf.contrib.seq2seq.tile_batch(
                listener_state, multiplier=params['beam_width'])
            batch_size = batch_size * params['beam_width']

    with tf.variable_scope('attention'):
        attention_cell, attention_state = attention_layer(
            input=listener_output,
            lengths=input_features_length,
            num_layers=params['attention_num_layers'],
            attention_units=params['attention_units'],
            attention_size=params['attention_size'],
            attention_type=params['attention_type'],
            activation=params['attention_activation'],
            keep_prob=params['attention_keep_prob'],
            train_ph=mode == tf.estimator.ModeKeys.TRAIN,
            batch_size=batch_size,
            input_state=None,
            use_tensorboard=True,
            tensorboard_scope='attention_cell'
        )

    with tf.variable_scope('speller'):
        def embedding_fn(ids):
            if params['num_embeddings'] != 0:
                target_embedding = tf.get_variable(
                    name='target_embedding',
                    shape=[params['num_classes'], params['num_embeddings']],
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer())
                return tf.nn.embedding_lookup(target_embedding, ids)
            else:
                return tf.one_hot(ids, params['num_classes'])

        projection_layer = tf.layers.Dense(params['num_classes'], use_bias=True, name='projection_layer')

        maximum_iterations = None
        if mode != tf.estimator.ModeKeys.TRAIN:
            max_source_length = tf.reduce_max(input_features_length)
            maximum_iterations = tf.to_int32(tf.round(tf.to_float(max_source_length) * 2))

        if mode == tf.estimator.ModeKeys.TRAIN:
            decoder_inputs = embedding_fn(decoder_inputs)

            decoder = attention_decoder(
                input_cell=attention_cell,
                initial_state=attention_state,
                embedding_fn=embedding_fn,
                seq_embedding=decoder_inputs,
                seq_embedding_len=targets_length,
                projection_layer=projection_layer,
                sampling_prob=params['sampling_probability'])

        elif mode == tf.estimator.ModeKeys.PREDICT and params['beam_width'] > 0:
            decoder = beam_search_decoder(
                input_cell=attention_cell,
                embedding=embedding_fn,
                start_token=params['sos_id'],
                end_token=params['eos_id'],
                initial_state=attention_state,
                beam_width=params['beam_width'],
                projection_layer=projection_layer,
                batch_size=batch_size)
        else:

            decoder = greedy_decoder(
                inputs=attention_cell,
                embedding=embedding_fn,
                start_token=params['sos_id'],
                end_token=params['eos_id'],
                initial_state=attention_state,
                projection_layer=projection_layer,
                batch_size=batch_size)

        decoder_outputs, final_context_state, final_sequence_length = tf.contrib.seq2seq.dynamic_decode(
            decoder, maximum_iterations=maximum_iterations)

    with tf.name_scope('prediction'):
        if mode == tf.estimator.ModeKeys.PREDICT and params['beam_width'] > 0:
            logits = tf.no_op()
            sample_ids = decoder_outputs.predicted_ids
        else:
            logits = decoder_outputs.rnn_output
            sample_ids = tf.to_int32(tf.argmax(logits, -1))

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {'sample_ids': sample_ids}

        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    with tf.name_scope('metrics'):
        ler = edit_distance(
            sample_ids, targets, params['eos_id'], None) #params.mapping)

        metrics = {'ler': tf.metrics.mean(ler),}

    tf.summary.scalar('ler', metrics['ler'][1])

    with tf.name_scope('loss'):
        kernel_loss = 0
        for var in tf.trainable_variables():
            if var.name.startswith('dense_layer') and 'kernel' in var.name:
                kernel_loss += tf.nn.l2_loss(var)

        attetion_loss = attention_loss(
            logits=logits,
            targets=targets,
            logits_length=final_sequence_length,
            targets_length=targets_length,
            eos_id=params['eos_id'],
            train_ph=mode == tf.estimator.ModeKeys.TRAIN)

        loss = attetion_loss + params['kernel_regularizer'] * kernel_loss

    if mode == tf.estimator.ModeKeys.EVAL:
        def _create_attention_images_summary(context_state):
            """Reference: https://github.com/tensorflow/nmt/blob/master/nmt/attention_model.py"""
            images = (context_state.alignment_history.stack())
            # Reshape to (batch, src_seq_len, tgt_seq_len,1)
            images = tf.expand_dims(tf.transpose(images, [1, 2, 0]), -1)
            # Scale to range [0, 255]
            images -= 1
            images = -images
            images *= 255
            summary = tf.summary.image("attention_images", images)
            return summary
        with tf.name_scope('alignment'):
            attention_summary = _create_attention_images_summary(final_context_state)

        eval_summary_hook = tf.train.SummarySaverHook(
            save_steps=10,
            output_dir=os.path.join(config.model_dir, 'eval'),
            summary_op=attention_summary)

        logging_hook = tf.train.LoggingTensorHook(
            tensors={
                'ler': tf.reduce_mean(ler),
                # 'max_predictions': sample_ids[tf.argmax(ler)],
                # 'max_targets': targets[tf.argmax(ler)],
            },
            every_n_iter=10)

        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics,
                                          evaluation_hooks=[logging_hook, eval_summary_hook])

    with tf.name_scope('train'):
        if params['use_learning_rate_decay']:
            learning_rate = tf.train.exponential_decay(
                params['learning_rate'],
                global_step,
                decay_steps=params['learning_rate_decay_steps'],
                decay_rate=params['learning_rate_decay'],
                staircase=True)
        else:
            learning_rate = params['learning_rate']

        if params['optimizer'] == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        elif params['optimizer'] == 'momentum' and params['momentum'] is not None:
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=params['momentum'])
        elif params['optimizer'] == 'rms':
            optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
        else:
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

        loss = tf.tuple([loss], control_inputs=tf.get_collection(tf.GraphKeys.UPDATE_OPS))[0]
        if params['clip_gradient'] != 0:
            grads = tf.gradients(loss, tf.trainable_variables())
            grads, _ = tf.clip_by_global_norm(grads, params['clip_gradient'])
            grads_and_vars = list(zip(grads, tf.trainable_variables()))
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
        else:
            train_op = optimizer.minimize(loss, global_step=global_step)

    logging_hook = tf.train.LoggingTensorHook(
        tensors={
            'loss': loss,
            'ler': tf.reduce_mean(ler),
            'learning_rate': tf.reduce_mean(learning_rate),
            # 'feal_len': features['feat_len'],
            # 'feal_len2': input_features_length,
            # 'feal_len3': tf.shape(input_features),
        },
        every_n_secs=1)

    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op,
                                      training_hooks=[logging_hook], eval_metric_ops=metrics)
Esempio n. 7
0
    def create_graph(self,
                     use_tfrecords=False,
                     features_tensor=None,
                     labels_tensor=None,
                     features_len_tensor=None):

        with self.graph.as_default():
            self.tf_is_traing_pl = tf.placeholder_with_default(True, shape=(), name='is_training')

            with tf.name_scope("seq_len"):
                if not use_tfrecords:
                    self.input_features_length = tf.placeholder(tf.int32, shape=[None], name="sequence_length")
                else:
                    self.input_features_length = features_len_tensor

            with tf.name_scope("input_features"):
                if not use_tfrecords:
                    self.input_features = tf.placeholder(
                        dtype=tf.float32,
                        shape=[None, None, self.network_data.num_features],
                        name="input")
                else:
                    self.input_features = features_tensor

            with tf.name_scope("input_labels"):
                if not use_tfrecords:
                    self.input_labels = tf.sparse_placeholder(
                        dtype=tf.int32,
                        shape=[None, None],
                        name="input_label")
                else:
                    self.input_labels = labels_tensor

            self.rnn_input = tf.identity(self.input_features)
            with tf.name_scope("dense_layer_1"):
                self.rnn_input = dense_multilayer(input_ph=self.rnn_input,
                                                  num_layers=self.network_data.num_dense_layers_1,
                                                  num_units=self.network_data.num_units_1,
                                                  name='dense_layer_1',
                                                  activation_list=self.network_data.dense_activations_1,
                                                  use_batch_normalization=self.network_data.batch_normalization_1,
                                                  train_ph=self.tf_is_traing_pl,
                                                  use_tensorboard=True,
                                                  keep_prob_list=self.network_data.keep_prob_1,
                                                  kernel_initializers=self.network_data.kernel_init_1,
                                                  bias_initializers=self.network_data.bias_init_1,
                                                  tensorboard_scope='dense_layer_1')

            with tf.name_scope("RNN_cell"):
                if self.network_data.is_bidirectional:
                    self.rnn_outputs = bidirectional_rnn(
                        input_ph=self.rnn_input,
                        seq_len_ph=self.input_features_length,
                        num_layers=len(self.network_data.num_fw_cell_units),
                        num_fw_cell_units=self.network_data.num_fw_cell_units,
                        num_bw_cell_units=self.network_data.num_bw_cell_units,
                        name="RNN_cell",
                        activation_fw_list=self.network_data.cell_fw_activation,
                        activation_bw_list=self.network_data.cell_bw_activation,
                        use_tensorboard=True,
                        tensorboard_scope='RNN',
                        output_size=self.network_data.rnn_output_sizes)

                else:
                    self.rnn_outputs = unidirectional_rnn(
                        input_ph=self.rnn_input,
                        seq_len_ph=self.input_features_length,
                        num_layers=len(self.network_data.num_cell_units),
                        num_cell_units=self.network_data.num_cell_units,
                        name="RNN_cell",
                        activation_list=self.network_data.cell_activation,
                        use_tensorboard=True,
                        tensorboard_scope='RNN',
                        output_size=self.network_data.rnn_output_sizes)

            with tf.name_scope("dense_layer_2"):
                self.rnn_outputs = dense_multilayer(input_ph=self.rnn_outputs,
                                                    num_layers=self.network_data.num_dense_layers_2,
                                                    num_units=self.network_data.num_units_2,
                                                    name='dense_layer_2',
                                                    activation_list=self.network_data.dense_activations_2,
                                                    use_batch_normalization=self.network_data.batch_normalization_2,
                                                    train_ph=self.tf_is_traing_pl,
                                                    use_tensorboard=True,
                                                    keep_prob_list=self.network_data.keep_prob_2,
                                                    kernel_initializers=self.network_data.kernel_init_2,
                                                    bias_initializers=self.network_data.bias_init_2,
                                                    tensorboard_scope='dense_layer_2')

            with tf.name_scope("dense_output"):
                self.dense_output_no_activation = dense_layer(input_ph=self.rnn_outputs,
                                                              num_units=self.network_data.num_classes,
                                                              name='dense_output_no_activation',
                                                              activation=None,
                                                              use_batch_normalization=False,
                                                              train_ph=False,
                                                              use_tensorboard=True,
                                                              keep_prob=1,
                                                              tensorboard_scope='dense_output')

                self.dense_output = tf.nn.softmax(self.dense_output_no_activation, name='dense_output')
                tf.summary.histogram('dense_output', self.dense_output)

            with tf.name_scope("loss"):
                rnn_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('RNN_cell') and 'kernel' in var.name:
                        rnn_loss += tf.nn.l2_loss(var)

                dense_loss = 0
                for var in tf.trainable_variables():
                    if var.name.startswith('dense_layer') or \
                            var.name.startswith('input_dense_layer') and \
                            'kernel' in var.name:
                        dense_loss += tf.nn.l2_loss(var)

                loss = tf.nn.ctc_loss(self.input_labels, self.dense_output_no_activation, self.input_features_length, time_major=False)
                self.logits_loss = tf.reduce_mean(tf.reduce_sum(loss))
                self.loss = self.logits_loss \
                            + self.network_data.rnn_regularizer * rnn_loss \
                            + self.network_data.dense_regularizer * dense_loss
                tf.summary.scalar('loss', self.loss)

            # define the optimizer
            with tf.name_scope("training"):
                self.train_op = self.network_data.optimizer.minimize(self.loss)

            with tf.name_scope("decoder"):
                self.output_time_major = tf.transpose(self.dense_output, (1, 0, 2))
                self.decoded, log_prob = self.network_data.decoder_function(self.output_time_major, self.input_features_length)

            with tf.name_scope("label_error_rate"):
                # Inaccuracy: label error rate
                self.ler = tf.reduce_mean(tf.edit_distance(hypothesis=tf.cast(self.decoded[0], tf.int32),
                                                           truth=self.input_labels,
                                                           normalize=True))
                tf.summary.scalar('label_error_rate', tf.reduce_mean(self.ler))

            self.checkpoint_saver = tf.train.Saver(save_relative_paths=True)
            self.merged_summary = tf.summary.merge_all()