Ejemplo n.º 1
0
    def __call__(self,
                 inputs,
                 seq_length,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the CNN variables and operations to the graph
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            #input layer
            conv = Conv2dLayer(self.num_units, 3, 1)

            #output layer
            outlayer = FFLayer(self.output_dim,
                               TfActivation(None, lambda (x): x), 0)

            time_steps = [inputs]
            num_time_steps = 11

            print inputs[1]

            for i in range(num_time_steps):
                forward = tf.pad(inputs[:, i + 1:, :],
                                 [[0, 0][0, i + 1], [0, 0]])
                backward = tf.pad(inputs[:, :-i - 1, :],
                                  [[0, 0][i + 1, 0], [0, 0]])
                time_steps += [forward, backward]
            logits = tf.pack(time_steps, axis=3)

            #apply the input layer
            #logits = tf.expand_dims(inputs, 3)
            for l in range(1, self.num_layers):
                logits = conv(logits, seq_length, is_training,
                              'convlayer' + str(l))
                logits = tf.nn.relu(logits)

            #stack all the output channels for the final layer
            logits = tf.reshape(logits, list(logits.get_shape()[0:2] + [-1]))

            #convert the logits to nonsequence logits for the output layer
            logits = seq_convertors.seq2nonseq(logits, seq_length)

            logits = outlayer(logits, seq_length, is_training, 'outlayer')

            #convert the logits to sequence logits to match expected output
            logits = seq_convertors.nonseq2seq(logits, seq_length,
                                               int(inputs.get_shape()[0]))

            #create a saver
            saver = tf.train.Saver()

            control_ops = None

        return seq_logits, seq_length, saver, control_ops
Ejemplo n.º 2
0
    def __call__(self,
                 inputs,
                 input_seq_length,
                 targets=None,
                 target_seq_length=None,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the neural net variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a
                [batch_size x max_input_length x feature_dim] tensor
            input_seq_length: The sequence lengths of the input utterances, this
                is a [batch_size] dimansional vector
            targets: the targets to the neural network, this is a
                [batch_size x max_output_length x 1] tensor. The targets can be
                used during training
            target_seq_length: The sequence lengths of the target utterances,
                this is a [batch_size] dimansional vector
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A quadruple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations (may be empty)
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            #create the input layer
            inlayer = Conv1dlayer(self.num_units, self.kernel_size, 1)

            #create the gated convolutional layers
            dconv = GatedDilatedConvolution(self.kernel_size)

            #create the fully connected layer
            act = activation.TfActivation(None, tf.nn.relu)
            fflayer = FFLayer(self.num_units, act)

            #create the output layer
            act = activation.TfActivation(None, lambda x: x)
            outlayer = FFLayer(self.output_dim, act)

            #apply the input layer
            logits = 0
            forward = inlayer(inputs, is_training, reuse, 'inlayer')

            #apply the the blocks of dilated convolutions layers
            for b in range(self.num_blocks):
                for l in range(self.num_layers):
                    forward, highway = dconv(forward, 2**l, is_training, reuse,
                                             'dconv%d-%d' % (b, l))
                    logits += highway

            #go to nonsequential data
            logits = seq_convertors.seq2nonseq(logits, input_seq_length)

            #apply the relu
            logits = tf.nn.relu(logits)

            #apply the fully connected layer
            logits = fflayer(logits, is_training, reuse, scope='FFlayer')

            #apply the output layer
            logits = outlayer(logits, is_training, reuse, scope='outlayer')

            #go back to sequential data
            logits = seq_convertors.nonseq2seq(logits, input_seq_length,
                                               int(inputs.get_shape()[1]))

            #create a saver
            saver = tf.train.Saver()

        return logits, input_seq_length, saver, None
Ejemplo n.º 3
0
    def train_NN(self, config, train_important_information,
                 valid_important_information):

        ##########################
        ### DATASET
        ##########################

        train_data_dir = config.get('directories',
                                    'exp_dir') + '/train_features_dir'
        valid_data_dir = config.get('directories',
                                    'exp_dir') + '/valid_features_dir'
        NN_dir = config.get('directories', 'exp_dir') + '/NN_train_dir'

        if not os.path.isdir(NN_dir):
            os.mkdir(NN_dir)

        logdir = NN_dir + '/logdir'

        if not os.path.isdir(logdir):
            os.mkdir(logdir)

        #########################
        ### SETTINGS
        ##########################

        # Hyperparameters
        initial_learning_rate = float(
            config.get('simple_NN', 'initial_learning_rate'))
        decay_steps = int(config.get('simple_NN', 'decay_steps'))
        decay_rate = float(config.get('simple_NN', 'decay_rate'))

        # Architecture
        n_hidden = int(config.get('simple_NN', 'n_hidden'))
        hid_layer_num = int(config.get('simple_NN', 'hidden_layer_num'))
        n_input = train_important_information['input_dim']
        training_epochs = int(config.get('simple_NN', 'training_epochs'))
        batch_size = int(config.get('simple_NN', 'train_batch_size'))
        valid_batch_total = valid_important_information['valid_batch_total']
        n_classes = train_important_information['num_labels']
        training_batch_total = train_important_information[
            'training_batch_total']
        max_input_length = train_important_information['train_utt_max_length']
        max_target_length = train_important_information[
            'train_label_max_length']

        ##########################
        ### GRAPH DEFINITION
        ##########################

        g = tf.Graph()
        with g.as_default():

            with tf.name_scope('input'):

                #create the inputs placeholder
                inputs = tf.placeholder(
                    tf.float32,
                    shape=[max_input_length, batch_size, n_input],
                    name='features')

                #the length of all the input sequences
                input_seq_length = tf.placeholder(tf.int32,
                                                  shape=[batch_size],
                                                  name='input_seq_length')

                #split the 3D input tensor in a list of batch_size*input_dim tensors
                split_inputs = tf.unstack(inputs,
                                          name='split_inputs_training_op')

                #convert the sequential data to non sequential data
                nonseq_inputs = seq_convertors.seq2nonseq(
                    split_inputs, input_seq_length, name='inputs-processing')

            with tf.name_scope('target'):

                #reference labels
                targets = tf.placeholder(
                    tf.int32,
                    shape=[max_target_length, batch_size, 1],
                    name='targets')

                #the length of all the output sequences
                target_seq_length = tf.placeholder(tf.int32,
                                                   shape=[batch_size],
                                                   name='output_seq_length')

            # Model parameters
            with tf.name_scope("weights"):

                weights = {
                    'h' + str(i):
                    tf.Variable(tf.truncated_normal([n_hidden, n_hidden],
                                                    stddev=0.1),
                                name="h" + str(i) + "_value")
                    for i in range(2, hid_layer_num + 1)
                }
                weights['h1'] = tf.Variable(tf.truncated_normal(
                    [n_input, n_hidden], stddev=0.1),
                                            name="h1_value")
                weights['out'] = tf.Variable(tf.truncated_normal(
                    [n_hidden, n_classes], stddev=0.1),
                                             name="weight_out_value")

            with tf.name_scope("biases"):

                biases = {
                    'b' + str(i): tf.Variable(tf.zeros([n_hidden]),
                                              name="b" + str(i) + "_value")
                    for i in range(1, hid_layer_num + 1)
                }
                biases['out'] = tf.Variable(tf.zeros([n_classes]),
                                            name="bias_out_value")

            # Multilayer perceptron

            with tf.name_scope("layer-1"):

                layer_1 = tf.add(tf.matmul(nonseq_inputs, weights['h1']),
                                 biases['b1'])

                layer_out = tf.nn.tanh(layer_1)

            for i in range(2, hid_layer_num + 1):

                with tf.name_scope("layer-" + str(i)):

                    layer = tf.add(tf.matmul(layer_out, weights['h' + str(i)]),
                                   biases['b' + str(i)])

                    layer_out = tf.nn.tanh(layer)

                    print "hidden layer " + str(i)

            with tf.name_scope("hid_out"):

                nonseq_logits = tf.add(tf.matmul(layer_out, weights['out']),
                                       biases['out'])

            with tf.name_scope("targets-processing"):

                #split the 3D targets tensor in a list of batch_size*1 tensors
                split_targets = tf.unstack(targets)

                nonseq_targets = seq_convertors.seq2nonseq(
                    split_targets,
                    target_seq_length,
                    name="targets-processing")
                #make a vector out of the targets
                nonseq_targets = tf.reshape(nonseq_targets, [-1])

                #one hot encode the targets
                #pylint: disable=E1101
                end_nonseq_targets = tf.one_hot(
                    nonseq_targets, int(nonseq_logits.get_shape()[1]))

            with tf.name_scope('soft_max'):

                # Loss and optimizer
                loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=nonseq_logits, labels=end_nonseq_targets)
                cost = tf.reduce_mean(loss, name='cost_op')

            with tf.name_scope('train'):

                global_step = tf.Variable(0, trainable=False)
                learning_rate = tf.train.exponential_decay(
                    initial_learning_rate,
                    global_step,
                    decay_steps,
                    decay_rate,
                    staircase=True)
                #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
                train = optimizer.minimize(cost,
                                           global_step=global_step,
                                           name='train_op')

            with tf.name_scope('Accuracy'):

                # Prediction
                correct_prediction = tf.equal(tf.argmax(end_nonseq_targets, 1),
                                              tf.argmax(nonseq_logits, 1))
                accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                  tf.float32),
                                          name='accuracy_op')
                accuracy_valid = tf.reduce_mean(tf.cast(
                    correct_prediction, tf.float32),
                                                name='valid-accuracy_op')

            #create a summary for our cost and accuracy
            tf.summary.scalar("cost", cost)
            tf.summary.scalar("train-accuracy", accuracy)
            tf.summary.scalar("valid-accuracy", accuracy_valid)
            tf.summary.histogram('histogram-train-accuracy', accuracy)
            tf.summary.histogram('histogram-valid-accuracy', accuracy_valid)

            # merge all summaries into a single "operation" which we can execute in a session
            summary_op = tf.summary.merge_all()

            saver = tf.train.Saver(max_to_keep=10000)

        ##########################
        ### TRAINING & EVALUATION
        ##########################

        config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.95

        with tf.Session(graph=g, config=config) as sess:

            sess.run(tf.global_variables_initializer())

            # create log writer object
            writer = tf.summary.FileWriter(logdir,
                                           graph=tf.get_default_graph())

            for epoch in range(training_epochs):

                avg_cost = 0.

                for i in range(training_batch_total):

                    train_batch_x = np.load(
                        gzip.GzipFile(
                            train_data_dir + '/batch_inputs_' + str(i) +
                            '.npy.gz', "r"))
                    train_batch_y = np.load(
                        gzip.GzipFile(
                            train_data_dir + '/batch_targets_' + str(i) +
                            '.npy.gz', "r"))
                    train_input_seq_length = np.load(
                        gzip.GzipFile(
                            train_data_dir + '/batch_input_seq_length_' +
                            str(i) + '.npy.gz', "r"))
                    train_target_seq_length = np.load(
                        gzip.GzipFile(
                            train_data_dir + '/batch_output_seq_length_' +
                            str(i) + '.npy.gz', "r"))

                    # perform the operations we defined earlier on batch
                    _, c, summary = sess.run(
                        [train, cost, summary_op],
                        feed_dict={
                            inputs: train_batch_x,
                            targets: train_batch_y,
                            input_seq_length: train_input_seq_length,
                            target_seq_length: train_target_seq_length
                        })
                    avg_cost += c
                    # write log
                    writer.add_summary(summary,
                                       epoch * training_batch_total + i)

                train_acc = 0
                for j in range(training_batch_total):

                    train_x = np.load(
                        gzip.GzipFile(
                            train_data_dir + '/batch_inputs_' + str(j) +
                            '.npy.gz', "r"))
                    train_y = np.load(
                        gzip.GzipFile(
                            train_data_dir + '/batch_targets_' + str(j) +
                            '.npy.gz', "r"))
                    train_x_seq_length = np.load(
                        gzip.GzipFile(
                            train_data_dir + '/batch_input_seq_length_' +
                            str(j) + '.npy.gz', "r"))
                    train_y_seq_length = np.load(
                        gzip.GzipFile(
                            train_data_dir + '/batch_output_seq_length_' +
                            str(j) + '.npy.gz', "r"))

                    train_batch_acc = sess.run(accuracy,
                                               feed_dict={
                                                   inputs:
                                                   train_x,
                                                   targets:
                                                   train_y,
                                                   input_seq_length:
                                                   train_x_seq_length,
                                                   target_seq_length:
                                                   train_y_seq_length
                                               })

                    train_acc += train_batch_acc
                    print "batch accuracy " + str(j)

                train_acc /= (training_batch_total)

                valid_acc = 0
                for j in range(valid_batch_total):

                    validation_x = np.load(
                        gzip.GzipFile(
                            valid_data_dir + '/batch_inputs_' + str(j) +
                            '.npy.gz', "r"))
                    validation_y = np.load(
                        gzip.GzipFile(
                            valid_data_dir + '/batch_targets_' + str(j) +
                            '.npy.gz', "r"))
                    validation_x_seq_length = np.load(
                        gzip.GzipFile(
                            valid_data_dir + '/batch_input_seq_length_' +
                            str(j) + '.npy.gz', "r"))
                    validation_y_seq_length = np.load(
                        gzip.GzipFile(
                            valid_data_dir + '/batch_output_seq_length_' +
                            str(j) + '.npy.gz', "r"))

                    validation_batch_acc = sess.run(
                        accuracy_valid,
                        feed_dict={
                            inputs: validation_x,
                            targets: validation_y,
                            input_seq_length: validation_x_seq_length,
                            target_seq_length: validation_y_seq_length
                        })

                    valid_acc += validation_batch_acc

                valid_acc /= valid_batch_total

                #print("Epoch: %03d | AvgCost: %.3f" % (epoch + 1, avg_cost / (i + 1)), end="")
                #print(" | Train/Valid ACC: %.3f/%.3f" % (train_acc, valid_acc))

                accuracy_log_file = open(logdir + '/accuracy_log', "a")
                accuracy_log_file.write("Epoch: %03d | AvgCost: %.3f" %
                                        (epoch + 1, avg_cost / (i + 1)))
                accuracy_log_file.write(" | Train/Valid ACC: %.3f/%.3f" %
                                        (train_acc, valid_acc) + '\n')
                accuracy_log_file.close()

                saver.save(sess, NN_dir + '/model.ckpt', global_step=epoch + 1)
Ejemplo n.º 4
0
    def decode_data(self, writer):

        self.retrieved_data()

        ##########################
        ### GRAPH DEFINITION
        ##########################

        g = tf.Graph()
        with g.as_default():

            decode_inputs = tf.placeholder(
                tf.float32,
                shape=[self.max_length, self.input_dim],
                name='decode_inputs')

            decode_seq_length = tf.placeholder(tf.int32,
                                               shape=[1],
                                               name='decode_seq_length')

            split_inputs = tf.unstack(tf.expand_dims(decode_inputs, 1),
                                      name="decode_split_inputs_op")

            nonseq_inputs = seq_convertors.seq2nonseq(split_inputs,
                                                      decode_seq_length)

            # Multilayer perceptron
            layer_1 = tf.add(tf.matmul(nonseq_inputs, self.weights_h1),
                             self.bias_b1)
            layer_1 = tf.nn.tanh(layer_1)

            layer_2 = tf.add(tf.matmul(layer_1, self.weights_h2), self.bias_b2)
            layer_2 = tf.nn.tanh(layer_2)

            logits = tf.add(tf.matmul(layer_2, self.weights_out),
                            self.bias_out,
                            name="logits_op")

            seq_logits = seq_convertors.nonseq2seq(logits, decode_seq_length,
                                                   len(split_inputs))

            decode_logits = seq_convertors.seq2nonseq(seq_logits,
                                                      decode_seq_length)

            outputs = tf.nn.softmax(decode_logits, name="final_operation")

        ##########################
        ###      EVALUATION
        ##########################

        config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.9

        with tf.Session(graph=g, config=config) as sess:

            #with tf.Session(graph=g) as sess:

            sess.run(tf.global_variables_initializer())

            for i in range(self.total_uttarences):

                utt_id = self.utt_id_list[i]

                utt_mat = self.utt_dict[utt_id]

                input_seq_length = [utt_mat.shape[0]]
                #pad the inputs
                utt_mat = np.append(
                    utt_mat,
                    np.zeros(
                        [self.max_length - utt_mat.shape[0],
                         utt_mat.shape[1]]), 0)

                outputs_value = sess.run('final_operation:0',
                                         feed_dict={
                                             'decode_inputs:0':
                                             utt_mat,
                                             'decode_seq_length:0':
                                             input_seq_length
                                         })

                # print (outputs_value.shape)
                # print (type(outputs_value))

                #get state likelihoods by dividing by the prior
                output = outputs_value / self.prior

                #floor the values to avoid problems with log
                np.where(output == 0, np.finfo(float).eps, output)

                # print (output.shape)
                # print (type(output))

                #write the pseudo-likelihoods in kaldi feature format
                writer.write_next_utt(utt_id, np.log(output))

        #close the writer
        writer.close()
Ejemplo n.º 5
0
    def __call__(self,
                 inputs,
                 seq_length,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the DNN variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a list containing
                a [batch_size, input_dim] tensor for each time step
            seq_length: The sequence lengths of the input utterances, if None
                the maximal sequence length will be taken
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A triple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations:
                    -add: add a layer to the network
                    -init: initialise the final layer
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            #input layer
            layer = FFLayer(self.num_units, self.activation)

            #output layer
            outlayer = FFLayer(self.output_dim,
                               TfActivation(None, lambda (x): x), 0)

            #do the forward computation

            #convert the sequential data to non sequential data
            nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)

            activations = [None] * self.num_layers
            activations[0] = layer(nonseq_inputs, is_training, reuse, 'layer0')
            for l in range(1, self.num_layers):
                activations[l] = layer(activations[l - 1], is_training, reuse,
                                       'layer' + str(l))

            if self.layerwise_init:

                #variable that determines how many layers are initialised
                #in the neural net
                initialisedlayers = tf.get_variable(
                    'initialisedlayers', [],
                    initializer=tf.constant_initializer(0),
                    trainable=False,
                    dtype=tf.int32)

                #operation to increment the number of layers
                add_layer_op = initialisedlayers.assign(initialisedlayers +
                                                        1).op

                #compute the logits by selecting the activations at the layer
                #that has last been added to the network, this is used for layer
                #by layer initialisation
                logits = tf.case([(tf.equal(initialisedlayers, tf.constant(l)),
                                   Callable(activations[l]))
                                  for l in range(len(activations))],
                                 default=Callable(activations[-1]),
                                 exclusive=True,
                                 name='layerSelector')

                logits.set_shape([None, self.num_units])
            else:
                logits = activations[-1]

            logits = outlayer(logits, is_training, reuse,
                              'layer' + str(self.num_layers))

            if self.layerwise_init:
                #operation to initialise the final layer
                init_last_layer_op = tf.initialize_variables(
                    tf.get_collection(tf.GraphKeys.VARIABLES,
                                      scope=(tf.get_variable_scope().name +
                                             '/layer' + str(self.num_layers))))

                control_ops = {'add': add_layer_op, 'init': init_last_layer_op}
            else:
                control_ops = None

            #convert the logits to sequence logits to match expected output
            seq_logits = seq_convertors.nonseq2seq(logits, seq_length,
                                                   len(inputs))

            #create a saver
            saver = tf.train.Saver()

        return seq_logits, seq_length, saver, control_ops
Ejemplo n.º 6
0
    def train_NN(self, config, train_important_information,
                 valid_important_information):

        ##########################
        ### DATASET
        ##########################

        train_data_dir = config.get('directories',
                                    'exp_dir') + '/train_features_dir'
        valid_data_dir = config.get('directories',
                                    'exp_dir') + '/valid_features_dir'
        NN_dir = config.get('directories',
                            'exp_dir') + '/NN_train_dir_combined_acc_loss'

        if not os.path.isdir(NN_dir):
            os.mkdir(NN_dir)

        logdir = NN_dir + '/logdir'

        if not os.path.isdir(logdir):
            os.mkdir(logdir)

        #########################
        ### SETTINGS
        ##########################

        # Hyperparameters
        changed_learning_rate = float(
            config.get('simple_NN', 'initial_learning_rate'))
        #decay_steps = int(config.get('simple_NN', 'decay_steps')) # we are using num_steps instead of this
        decay_rate = float(config.get('simple_NN', 'decay_rate'))

        # Architecture
        n_hidden = int(config.get('simple_NN', 'n_hidden'))
        hid_layer_num = int(config.get('simple_NN', 'hidden_layer_num'))
        n_input = train_important_information['input_dim']
        training_epochs = int(config.get('simple_NN', 'training_epochs'))
        batch_size = int(config.get('simple_NN', 'train_batch_size'))
        valid_batch_total = valid_important_information['valid_batch_total']
        n_classes = train_important_information['num_labels']
        training_batch_total = train_important_information[
            'training_batch_total']
        max_input_length = train_important_information['train_utt_max_length']
        max_target_length = train_important_information[
            'train_label_max_length']

        num_steps = training_epochs * training_batch_total
        valid_frequency = training_batch_total  #means after each epoch
        total_number_of_retries = 3

        ##########################
        ### GRAPH DEFINITION
        ##########################

        g = tf.Graph()
        with g.as_default():

            #for making the learning rate half
            initial_learning_rate = tf.placeholder(tf.float32,
                                                   None,
                                                   name='initial_l_rate')
            learning_rate_factor = tf.placeholder(tf.float32,
                                                  None,
                                                  name='factor_value')

            with tf.name_scope('input'):

                #create the inputs placeholder
                inputs = tf.placeholder(
                    tf.float32,
                    shape=[max_input_length, batch_size, n_input],
                    name='features')

                #the length of all the input sequences
                input_seq_length = tf.placeholder(tf.int32,
                                                  shape=[batch_size],
                                                  name='input_seq_length')

                #split the 3D input tensor in a list of batch_size*input_dim tensors
                split_inputs = tf.unstack(inputs,
                                          name='split_inputs_training_op')

                #convert the sequential data to non sequential data
                nonseq_inputs = seq_convertors.seq2nonseq(
                    split_inputs, input_seq_length, name='inputs-processing')

            with tf.name_scope('target'):

                #reference labels
                targets = tf.placeholder(
                    tf.int32,
                    shape=[max_target_length, batch_size, 1],
                    name='targets')

                #the length of all the output sequences
                target_seq_length = tf.placeholder(tf.int32,
                                                   shape=[batch_size],
                                                   name='output_seq_length')

            # Model parameters
            with tf.name_scope("weights"):

                weights = {
                    'h' + str(i):
                    tf.Variable(tf.truncated_normal([n_hidden, n_hidden],
                                                    stddev=0.1),
                                name="h" + str(i) + "_value")
                    for i in range(2, hid_layer_num + 1)
                }
                weights['h1'] = tf.Variable(tf.truncated_normal(
                    [n_input, n_hidden], stddev=0.1),
                                            name="h1_value")
                weights['out'] = tf.Variable(tf.truncated_normal(
                    [n_hidden, n_classes], stddev=0.1),
                                             name="weight_out_value")

            with tf.name_scope("biases"):

                biases = {
                    'b' + str(i): tf.Variable(tf.zeros([n_hidden]),
                                              name="b" + str(i) + "_value")
                    for i in range(1, hid_layer_num + 1)
                }
                biases['out'] = tf.Variable(tf.zeros([n_classes]),
                                            name="bias_out_value")

            # Multilayer perceptron

            with tf.name_scope("layer-1"):

                layer_1 = tf.add(tf.matmul(nonseq_inputs, weights['h1']),
                                 biases['b1'])

                layer_out = tf.nn.tanh(layer_1)

            for i in range(2, hid_layer_num + 1):

                with tf.name_scope("layer-" + str(i)):

                    layer = tf.add(tf.matmul(layer_out, weights['h' + str(i)]),
                                   biases['b' + str(i)])

                    layer_out = tf.nn.tanh(layer)

            with tf.name_scope("hid_out"):

                nonseq_logits = tf.add(tf.matmul(layer_out, weights['out']),
                                       biases['out'])

            with tf.name_scope("targets-processing"):

                #split the 3D targets tensor in a list of batch_size*1 tensors
                split_targets = tf.unstack(targets)

                nonseq_targets = seq_convertors.seq2nonseq(
                    split_targets,
                    target_seq_length,
                    name="targets-processing")
                #make a vector out of the targets
                nonseq_targets = tf.reshape(nonseq_targets, [-1])

                #one hot encode the targets
                #pylint: disable=E1101
                end_nonseq_targets = tf.one_hot(
                    nonseq_targets, int(nonseq_logits.get_shape()[1]))

            with tf.name_scope('soft_max'):

                # Loss and optimizer
                validation_loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=nonseq_logits, labels=end_nonseq_targets)
                validation_cost = tf.reduce_mean(validation_loss,
                                                 name='validation_cost_op')

                train_loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=nonseq_logits, labels=end_nonseq_targets)
                train_cost = tf.reduce_mean(train_loss, name='train_cost_op')

            with tf.name_scope('train'):

                global_step = tf.Variable(0, trainable=False)
                learning_rate = tf.train.exponential_decay(
                    initial_learning_rate,
                    global_step,
                    num_steps,
                    decay_rate,
                    staircase=True) * learning_rate_factor
                #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
                train = optimizer.minimize(train_cost,
                                           global_step=global_step,
                                           name='train_op')

            with tf.name_scope('Accuracy'):

                # Prediction
                correct_prediction = tf.equal(tf.argmax(end_nonseq_targets, 1),
                                              tf.argmax(nonseq_logits, 1))
                accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                  tf.float32),
                                          name='accuracy_op')
                accuracy_valid = tf.reduce_mean(tf.cast(
                    correct_prediction, tf.float32),
                                                name='valid-accuracy_op')

            #create a summary for our cost and accuracy
            tf.summary.scalar("Train Loss", train_cost)
            tf.summary.scalar("Validation Loss", validation_cost)
            tf.summary.scalar("Training Accuracy", accuracy)
            tf.summary.scalar("Validation Accuracy", accuracy_valid)

            # merge all summaries into a single "operation" which we can execute in a session
            summary_op = tf.summary.merge_all()

            saver = tf.train.Saver(max_to_keep=10000)

        ##########################
        ### TRAINING & EVALUATION
        ##########################

        config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.9

        with tf.Session(graph=g, config=config) as sess:

            #with tf.Session(graph=g) as sess:
            sess.run(tf.global_variables_initializer())
            # create log writer object
            writer = tf.summary.FileWriter(logdir,
                                           graph=tf.get_default_graph())
            step = 0
            epoch = 0

            validation_loss = 100
            validation_accuracy = 0

            print "First validation loss: " + str(validation_loss)
            print "First validation accuracy: " + str(
                validation_accuracy) + "\n"
            validation_step = step
            num_retries = 0

            train_batch_number = 1
            train_file = 0
            factor = 1.0
            train_acc = 0

            while step < num_steps:

                train_batch_x = np.load(
                    gzip.GzipFile(
                        train_data_dir + '/batch_inputs_' + str(train_file) +
                        '.npy.gz', "r"))
                train_batch_y = np.load(
                    gzip.GzipFile(
                        train_data_dir + '/batch_targets_' + str(train_file) +
                        '.npy.gz', "r"))
                train_input_seq_length = np.load(
                    gzip.GzipFile(
                        train_data_dir + '/batch_input_seq_length_' +
                        str(train_file) + '.npy.gz', "r"))
                train_target_seq_length = np.load(
                    gzip.GzipFile(
                        train_data_dir + '/batch_output_seq_length_' +
                        str(train_file) + '.npy.gz', "r"))

                learning_rate_value, _, loss, train_batch_acc, summary = sess.run(
                    [learning_rate, train, train_cost, accuracy, summary_op],
                    feed_dict={
                        inputs: train_batch_x,
                        targets: train_batch_y,
                        input_seq_length: train_input_seq_length,
                        target_seq_length: train_target_seq_length,
                        learning_rate_factor: factor,
                        initial_learning_rate: changed_learning_rate
                    })

                changed_learning_rate = learning_rate_value
                train_acc += train_batch_acc

                if factor == 0.5:
                    factor = 1.0

                #print "Step number: "+ str(step+1) + " Training Batch Number: "+ str(train_file+1)+" Learning Rate: " + str(learning_rate_value)

                train_batch_number = train_batch_number + 1
                train_file = (train_batch_number % training_batch_total) - 1
                if train_file == -1:
                    train_file = training_batch_total - 1

                # write log to display in tensorboard
                writer.add_summary(summary, train_batch_number)

                step = step + 1

                if step % valid_frequency == 0:

                    epoch = train_batch_number / training_batch_total

                    sum_batch_current_loss = 0
                    valid_acc = 0
                    for valid_file in range(valid_batch_total):

                        validation_x = np.load(
                            gzip.GzipFile(
                                valid_data_dir + '/batch_inputs_' +
                                str(valid_file) + '.npy.gz', "r"))
                        validation_y = np.load(
                            gzip.GzipFile(
                                valid_data_dir + '/batch_targets_' +
                                str(valid_file) + '.npy.gz', "r"))
                        validation_x_seq_length = np.load(
                            gzip.GzipFile(
                                valid_data_dir + '/batch_input_seq_length_' +
                                str(valid_file) + '.npy.gz', "r"))
                        validation_y_seq_length = np.load(
                            gzip.GzipFile(
                                valid_data_dir + '/batch_output_seq_length_' +
                                str(valid_file) + '.npy.gz', "r"))

                        loss, validation_batch_acc, summary = sess.run(
                            [validation_cost, accuracy_valid, summary_op],
                            feed_dict={
                                inputs: validation_x,
                                targets: validation_y,
                                input_seq_length: validation_x_seq_length,
                                target_seq_length: validation_y_seq_length
                            })
                        sum_batch_current_loss += loss
                        valid_acc += validation_batch_acc
                    current_loss = sum_batch_current_loss / valid_batch_total
                    valid_acc /= valid_batch_total
                    #we will only check rounded 3 decimal points
                    current_validation_accuracy = float(
                        format(valid_acc, '.3f'))

                    train_acc /= (training_batch_total)

                    # writing accuracy information in a log file
                    accuracy_log_file = open(logdir + '/accuracy_log', "a")
                    print "\nEpoch: %03d Train/Valid Accuracy: %.3f/%.3f\n" % (
                        epoch, train_acc, valid_acc)
                    accuracy_log_file.write(
                        "Epoch: %03d | Learning Rate: %f | Train/Valid ACC: %.3f/%.3f"
                        % (epoch, learning_rate_value, train_acc, valid_acc) +
                        "\n")
                    accuracy_log_file.close()

                    train_acc = 0

                    if current_loss >= validation_loss or current_validation_accuracy <= validation_accuracy:

                        print "Make learning rate half, Current_loss: " + str(
                            current_loss) + " Validation_loss: " + str(
                                validation_loss)
                        print "Epoch: " + str(epoch) + " Step number: " + str(
                            step + 1) + " Training Batch Number: " + str(
                                train_file + 1) + " New Learning Rate: " + str(
                                    learning_rate_value * .5)

                        factor = 0.5

                        step = validation_step
                        validation_accuracy = current_validation_accuracy
                        num_retries = num_retries + 1
                        print "Number of Retries: " + str(num_retries) + "\n"

                        if num_retries == total_number_of_retries:

                            saver.save(sess,
                                       NN_dir + '/model.ckpt',
                                       global_step=train_batch_number - 1)
                            save_batch_file = open(
                                NN_dir + '/save_batch_number', "w")
                            save_batch_file.write(str(train_batch_number - 1))
                            save_batch_file.close()

                            print "Number of retries reaches maximum, finishing training the model"
                            break

                        continue

                    else:
                        print "Keep learning rate same, Current_loss: " + str(
                            current_loss) + " Validation_loss: " + str(
                                validation_loss)
                        print "Epoch: " + str(epoch) + " Step number: " + str(
                            step + 1) + " Training Batch Number: " + str(
                                train_file + 1) + " New Learning Rate: " + str(
                                    learning_rate_value) + "\n"
                        factor = 1.0
                        validation_loss = current_loss
                        validation_accuracy = current_validation_accuracy
                        validation_step = step

                        num_retries = 0

                if step == num_steps:
                    saver.save(sess,
                               NN_dir + '/model.ckpt',
                               global_step=train_batch_number - 1)
                    save_batch_file = open(NN_dir + '/save_batch_number', "w")
                    save_batch_file.write(str(train_batch_number - 1))
                    save_batch_file.close()
Ejemplo n.º 7
0
    def __call__(self,
                 inputs,
                 input_seq_length,
                 targets=None,
                 target_seq_length=None,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the neural net variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a
                [batch_size x max_input_length x feature_dim] tensor
            input_seq_length: The sequence lengths of the input utterances, this
                is a [batch_size] dimansional vector
            targets: the targets to the neural network, this is a
                [batch_size x max_output_length x 1] tensor. The targets can be
                used during training
            target_seq_length: The sequence lengths of the target utterances,
                this is a [batch_size] dimansional vector
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A quadruple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations (may be empty)
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            #the blstm layer
            blstm = BLSTMLayer(self.num_units)

            #the linear output layer
            outlayer = FFLayer(self.output_dim,
                               TfActivation(None, lambda (x): x), 0)

            #do the forward computation

            #add gaussian noise to the inputs
            if is_training:
                logits = inputs + tf.random_normal(inputs.get_shape(),
                                                   stddev=0.6)
            else:
                logits = inputs

            for layer in range(self.num_layers):
                logits = blstm(logits, input_seq_length, is_training, reuse,
                               'layer' + str(layer))

                logits = self.activation(logits, is_training, reuse)

            logits = seq_convertors.seq2nonseq(logits, input_seq_length)

            logits = outlayer(logits, is_training, reuse, 'outlayer')

            logits = seq_convertors.nonseq2seq(logits, input_seq_length,
                                               int(inputs.get_shape()[1]))

            #create a saver
            saver = tf.train.Saver()

        return logits, input_seq_length, saver, None
Ejemplo n.º 8
0
    def decode_data(self, writer):

        self.retrieved_data()

        ##########################
        ### GRAPH DEFINITION
        ##########################

        g = tf.Graph()
        with g.as_default():

            decode_inputs = tf.placeholder(
                tf.float32,
                shape=[self.max_length, self.input_dim],
                name='inputs')

            decode_seq_length = tf.placeholder(tf.int32,
                                               shape=[1],
                                               name='seq_length')

            split_inputs = tf.unstack(tf.expand_dims(decode_inputs, 1),
                                      name="decode_split_inputs_op")

            nonseq_inputs = seq_convertors.seq2nonseq(split_inputs,
                                                      decode_seq_length)

            inputs_img = tf.reshape(
                nonseq_inputs, tf.stack([tf.shape(nonseq_inputs)[0], 7, 1,
                                         13]))
            inputs_img = tf.transpose(inputs_img, [0, 1, 3, 2])

            print 'Input Img: '
            print inputs_img.get_shape().as_list()

            hidden_1 = self.convolution(inputs_img, self.conv1_weights,
                                        self.conv1_biases)

            pool = tf.nn.max_pool(hidden_1,
                                  ksize=[1, 3, 1, 1],
                                  strides=[1, 1, 1, 1],
                                  padding='VALID')

            print 'poll_l1: '
            print pool.get_shape().as_list()

            hidden_2 = self.convolution(pool, self.conv2_weights,
                                        self.conv2_biases)

            shape = hidden_2.get_shape().as_list()
            conv_outputs = tf.reshape(
                hidden_2,
                tf.stack(
                    [tf.shape(hidden_2)[0], shape[1] * shape[2] * shape[3]]))

            print 'Outputs: '
            print conv_outputs.get_shape().as_list()

            # Multilayer perceptron

            #-----------Start-----------

            layer_1 = tf.add(tf.matmul(conv_outputs, self.weights['h1']),
                             self.biases['b1'])

            layer_out = tf.nn.tanh(layer_1)

            for i in range(2, self.hid_layer_num + 1):

                layer = tf.add(
                    tf.matmul(layer_out, self.weights['h' + str(i)]),
                    self.biases['b' + str(i)])

                layer_out = tf.nn.tanh(layer)

            logits = tf.add(tf.matmul(layer_out, self.weights['out']),
                            self.biases['out'])

            outputs = tf.nn.softmax(logits, name="final_operation")

            #--------- End--------------------------

        ##########################
        ###      EVALUATION
        ##########################

        config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.9

        with tf.Session(graph=g, config=config) as sess:

            #with tf.Session(graph=g) as sess:

            sess.run(tf.global_variables_initializer())

            for i in range(self.total_uttarences):

                utt_id = self.utt_id_list[i]

                utt_mat = self.utt_dict[utt_id]

                input_seq_length = [utt_mat.shape[0]]

                #print "This is the input length: "+str(input_seq_length)

                #pad the inputs
                utt_mat = np.append(
                    utt_mat,
                    np.zeros(
                        [self.max_length - utt_mat.shape[0],
                         utt_mat.shape[1]]), 0)

                outputs_value = sess.run(outputs,
                                         feed_dict={
                                             decode_inputs: utt_mat,
                                             decode_seq_length:
                                             input_seq_length
                                         })

                print str(i + 1) + " " + str(
                    self.total_uttarences) + " " + str(utt_id) + " " + str(
                        outputs_value.shape)

                #get state likelihoods by dividing by the prior
                output = outputs_value / self.prior

                #floor the values to avoid problems with log
                output = np.where(output == 0, np.finfo(float).eps, output)

                # print (output.shape)
                # print (type(output))

                #write the pseudo-likelihoods in kaldi feature format
                writer.write_next_utt(utt_id, np.log(output))

        #close the writer
        writer.close()
Ejemplo n.º 9
0
    def __call__(self,
                 inputs,
                 seq_length,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the LSTM variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a list containing
                a [batch_size, input_dim] tensor for each time step
            seq_length: The sequence lengths of the input utterances, if None
                the maximal sequence length will be taken
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A triple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations:
                    -add: add a layer to the network
                    -init: initialise the final layer
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            weights = {
                'out':
                tf.get_variable(
                    'weights_out', [self.num_units, self.output_dim],
                    initializer=tf.contrib.layers.xavier_initializer())
            }

            biases = {
                'out':
                tf.get_variable('biases_out', [self.output_dim],
                                initializer=tf.constant_initializer(0))
            }

            #convert the sequential data to non sequential data
            nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)

            input_dim = nonseq_inputs.shape[1]
            nonseq_inputs = tf.reshape(nonseq_inputs, [-1, 11, 40])

            n_steps = 11
            nonseq_inputs = tf.transpose(nonseq_inputs, [1, 0, 2])

            keep_prob = 1
            # define the lstm cell
            # use the dropout in training mode
            if is_training and keep_prob < 1:
                lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                    self.num_units,
                    forget_bias=0.0,
                    input_size=None,
                    activation=tf.nn.relu,
                    layer_norm=False,
                    norm_gain=1.0,
                    norm_shift=0.0,
                    dropout_keep_prob=keep_prob,
                    dropout_prob_seed=None)

            lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                self.num_units,
                forget_bias=0.0,
                input_size=None,
                activation=tf.nn.relu,
                layer_norm=False,
                norm_gain=1.0,
                norm_shift=0.0,
                dropout_keep_prob=1,
                dropout_prob_seed=None)

            # stack the lstm to form multi-layers
            cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * self.num_layers,
                                               state_is_tuple=True)

            # print(int(nonseq_inputs.shape[0]))
            # self._initial_state = cell.zero_state(int(nonseq_inputs.shape[0]), tf.float32)

            # apply the dropout for the inputs to the first hidden layer
            if is_training and keep_prob < 1:
                nonseq_inputs = tf.nn.dropout(nonseq_inputs, keep_prob)

            final_nonseq_inputs = tf.unstack(nonseq_inputs,
                                             num=n_steps,
                                             axis=0)

            # Get lstm cell output initial_state=self._initial_state,
            outputs, states = tf.contrib.rnn.static_rnn(cell,
                                                        final_nonseq_inputs,
                                                        dtype=tf.float32)
            outputs = outputs[-1]

            # Linear activation, using rnn inner loop last output
            logits = tf.matmul(outputs, weights['out']) + biases['out']

            # # if self.layerwise_init:

            # # #variable that determines how many layers are initialised
            # # #in the neural net
            # # initialisedlayers = tf.get_variable(
            # # 'initialisedlayers', [],
            # # initializer=tf.constant_initializer(0),
            # # trainable=False,
            # # dtype=tf.int32)

            # # #operation to increment the number of layers
            # # add_layer_op = initialisedlayers.assign(initialisedlayers+1).op

            # # #compute the logits by selecting the activations at the layer
            # # #that has last been added to the network, this is used for layer
            # # #by layer initialisation
            # # logits = tf.case(
            # # [(tf.equal(initialisedlayers, tf.constant(l)),
            # # Callable(activations[l]))
            # # for l in range(len(activations))],
            # # default=Callable(activations[-1]),
            # # exclusive=True, name='layerSelector')

            # # logits.set_shape([None, self.num_units])

            if self.layerwise_init:
                #operation to initialise the final layer
                init_last_layer_op = tf.initialize_variables(
                    tf.get_collection(tf.GraphKeys.VARIABLES,
                                      scope=(tf.get_variable_scope().name +
                                             '/layer' + str(self.num_layers))))

                control_ops = {'add': add_layer_op, 'init': init_last_layer_op}
            else:
                control_ops = None

            #convert the logits to sequence logits to match expected output
            seq_logits = seq_convertors.nonseq2seq(logits, seq_length,
                                                   len(inputs))

            #create a saver
            saver = tf.train.Saver()

        return seq_logits, seq_length, saver, control_ops
Ejemplo n.º 10
0
    def __call__(self,
                 inputs,
                 seq_length,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the DNN variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a list containing
                a [batch_size, input_dim] tensor for each time step
            seq_length: The sequence lengths of the input utterances, if None
                the maximal sequence length will be taken
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A triple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations:
                    -add: add a layer to the network
                    -init: initialise the final layer
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):
            #input layer
            layer = FFLayer(self.num_units, self.activation)
            #output layer
            outlayer = FFLayer(self.output_dim,
                               TfActivation(None, lambda x: x), 0)

            #convert the sequential data to non sequential data
            ## if you wanna use the pure dnn, please uncommit this line
            #nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)

            activations = [None] * self.num_layers

            # Define the first hidden layers
            # # the conv layer
            #cnn_layer = RestNet()
            #cnn_layer = CnnVd6()
            if self.cnn_type == 1:
                print('------The Cnn Config------')
                #convert the sequential data to non sequential data
                nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)

                cnn_layer = CnnLayer(self.cnn_conf)
                activations[0] = cnn_layer(nonseq_inputs, is_training, reuse,
                                           'layer0')
            else:
                print("Not using CNN")
            # # the lstm layer, type 1
            if self.lstm_type == 1:
                print('------The LSTM Config------')
                #convert the sequential data to non sequential data
                # the inputs format is: time List(such as 777), each element is 2-D tensor like: batch_size(such as 64) x fre-dim
                # the nonseq_inputs format is: batch_size x fre-dim, 2-D tensor, here the batch_size = batch_size x time
                nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)
                print(
                    'Type1: The lstm data process is the similar to dnn, use the stacking frame and not output state is reused'
                )

                lstm_layer = LSTMLayer(self.lstm_conf)
                activations[0] = lstm_layer(nonseq_inputs, is_training, reuse,
                                            'layer0')
            ## the lstm layer, type 2
            elif self.lstm_type == 2:
                print('------The LSTM Config------')
                print('Type2: The lstm data process is totally sequencial')

                # here we directly use the seq data, that's para: inputs
                lstm_layer = LSTMLayer2(self.lstm_conf2)
                # the dynamic lstm's output has the format: time x batch_size x feature_dim
                seq_output = lstm_layer(inputs, seq_length, is_training, reuse,
                                        'layer0')

                # to connect the dnn, we should tran the seq output to no-seq
                # so we can use directly with dnn
                activations[0] = seq_convertors.seq2nonseq(
                    seq_output, seq_length)

            ## the lstm layer, type 3
            elif self.lstm_type == 3:
                print('------The LSTM Config------')
                print('Type3: The lstm data is processed in sub-seq')

                # here we directly use the seq data, that's para: inputs
                lstm_layer = LSTMLayer3(self.lstm_conf3, self.max_input_length)
                # the dynamic lstm's output has the format: time x batch_size x feature_dim
                seq_output = lstm_layer(inputs, seq_length, is_training, reuse,
                                        'layer0')
                # to connect the dnn, we should tran the seq output to no-seq
                # so we can use directly with dnn

                # Note:
                # the seq_output here should has the first index corresponding to the seq_length
                # shape like: [seq_length, batch-size, output-dim]
                activations[0] = seq_convertors.seq2nonseq(
                    seq_output, seq_length)
            else:
                print("Not using LSTM")

            # define the FL hidden layers
            print('------The DNN Config------')
            print("use %d FL hidden layer" % (self.FL_num_layers))
            for l in range(1, self.num_layers):
                print("the " + str(l) + " layer's input is: " +
                      str(activations[l - 1].shape))
                activations[l] = layer(activations[l - 1], is_training, reuse,
                                       'layer' + str(l))

            if self.layerwise_init:
                #variable that determines how many layers are initialised
                #in the neural net
                initialisedlayers = tf.get_variable(
                    'initialisedlayers', [],
                    initializer=tf.constant_initializer(0),
                    trainable=False,
                    dtype=tf.int32)

                #operation to increment the number of layers
                add_layer_op = initialisedlayers.assign(initialisedlayers +
                                                        1).op

                #compute the logits by selecting the activations at the layer
                #that has last been added to the network, this is used for layer
                #by layer initialisation
                logits = tf.case([(tf.equal(initialisedlayers, tf.constant(l)),
                                   Callable(activations[l]))
                                  for l in range(len(activations))],
                                 default=Callable(activations[-1]),
                                 exclusive=True,
                                 name='layerSelector')

                logits.set_shape([None, self.num_units])
            else:
                logits = activations[-1]

            logits = outlayer(logits, is_training, reuse,
                              'layer' + str(self.num_layers))

            if self.layerwise_init:
                #operation to initialise the final layer
                init_last_layer_op = tf.initialize_variables(
                    tf.get_collection(
                        tf.GraphKeys.VARIABLES,
                        scope=(tf.get_variable_scope().name + '/layer' +
                               str(self.FL_num_layers))))

                control_ops = {'add': add_layer_op, 'init': init_last_layer_op}
            else:
                control_ops = None

            #convert the logits to sequence logits to match expected output
            seq_logits = seq_convertors.nonseq2seq(logits, seq_length,
                                                   len(inputs))

            #create a saver
            saver = tf.train.Saver()

        return seq_logits, seq_length, saver, control_ops