def __call__(self, inputs, seq_length, is_training=False, reuse=False, scope=None): ''' Add the CNN variables and operations to the graph ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #input layer conv = Conv2dLayer(self.num_units, 3, 1) #output layer outlayer = FFLayer(self.output_dim, TfActivation(None, lambda (x): x), 0) time_steps = [inputs] num_time_steps = 11 print inputs[1] for i in range(num_time_steps): forward = tf.pad(inputs[:, i + 1:, :], [[0, 0][0, i + 1], [0, 0]]) backward = tf.pad(inputs[:, :-i - 1, :], [[0, 0][i + 1, 0], [0, 0]]) time_steps += [forward, backward] logits = tf.pack(time_steps, axis=3) #apply the input layer #logits = tf.expand_dims(inputs, 3) for l in range(1, self.num_layers): logits = conv(logits, seq_length, is_training, 'convlayer' + str(l)) logits = tf.nn.relu(logits) #stack all the output channels for the final layer logits = tf.reshape(logits, list(logits.get_shape()[0:2] + [-1])) #convert the logits to nonsequence logits for the output layer logits = seq_convertors.seq2nonseq(logits, seq_length) logits = outlayer(logits, seq_length, is_training, 'outlayer') #convert the logits to sequence logits to match expected output logits = seq_convertors.nonseq2seq(logits, seq_length, int(inputs.get_shape()[0])) #create a saver saver = tf.train.Saver() control_ops = None return seq_logits, seq_length, saver, control_ops
def __call__(self, inputs, input_seq_length, targets=None, target_seq_length=None, is_training=False, reuse=False, scope=None): ''' Add the neural net variables and operations to the graph Args: inputs: the inputs to the neural network, this is a [batch_size x max_input_length x feature_dim] tensor input_seq_length: The sequence lengths of the input utterances, this is a [batch_size] dimansional vector targets: the targets to the neural network, this is a [batch_size x max_output_length x 1] tensor. The targets can be used during training target_seq_length: The sequence lengths of the target utterances, this is a [batch_size] dimansional vector is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A quadruple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations (may be empty) ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #create the input layer inlayer = Conv1dlayer(self.num_units, self.kernel_size, 1) #create the gated convolutional layers dconv = GatedDilatedConvolution(self.kernel_size) #create the fully connected layer act = activation.TfActivation(None, tf.nn.relu) fflayer = FFLayer(self.num_units, act) #create the output layer act = activation.TfActivation(None, lambda x: x) outlayer = FFLayer(self.output_dim, act) #apply the input layer logits = 0 forward = inlayer(inputs, is_training, reuse, 'inlayer') #apply the the blocks of dilated convolutions layers for b in range(self.num_blocks): for l in range(self.num_layers): forward, highway = dconv(forward, 2**l, is_training, reuse, 'dconv%d-%d' % (b, l)) logits += highway #go to nonsequential data logits = seq_convertors.seq2nonseq(logits, input_seq_length) #apply the relu logits = tf.nn.relu(logits) #apply the fully connected layer logits = fflayer(logits, is_training, reuse, scope='FFlayer') #apply the output layer logits = outlayer(logits, is_training, reuse, scope='outlayer') #go back to sequential data logits = seq_convertors.nonseq2seq(logits, input_seq_length, int(inputs.get_shape()[1])) #create a saver saver = tf.train.Saver() return logits, input_seq_length, saver, None
def decode_data(self, writer): self.retrieved_data() ########################## ### GRAPH DEFINITION ########################## g = tf.Graph() with g.as_default(): decode_inputs = tf.placeholder( tf.float32, shape=[self.max_length, self.input_dim], name='decode_inputs') decode_seq_length = tf.placeholder(tf.int32, shape=[1], name='decode_seq_length') split_inputs = tf.unstack(tf.expand_dims(decode_inputs, 1), name="decode_split_inputs_op") nonseq_inputs = seq_convertors.seq2nonseq(split_inputs, decode_seq_length) # Multilayer perceptron layer_1 = tf.add(tf.matmul(nonseq_inputs, self.weights_h1), self.bias_b1) layer_1 = tf.nn.tanh(layer_1) layer_2 = tf.add(tf.matmul(layer_1, self.weights_h2), self.bias_b2) layer_2 = tf.nn.tanh(layer_2) logits = tf.add(tf.matmul(layer_2, self.weights_out), self.bias_out, name="logits_op") seq_logits = seq_convertors.nonseq2seq(logits, decode_seq_length, len(split_inputs)) decode_logits = seq_convertors.seq2nonseq(seq_logits, decode_seq_length) outputs = tf.nn.softmax(decode_logits, name="final_operation") ########################## ### EVALUATION ########################## config = tf.ConfigProto() #config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(graph=g, config=config) as sess: #with tf.Session(graph=g) as sess: sess.run(tf.global_variables_initializer()) for i in range(self.total_uttarences): utt_id = self.utt_id_list[i] utt_mat = self.utt_dict[utt_id] input_seq_length = [utt_mat.shape[0]] #pad the inputs utt_mat = np.append( utt_mat, np.zeros( [self.max_length - utt_mat.shape[0], utt_mat.shape[1]]), 0) outputs_value = sess.run('final_operation:0', feed_dict={ 'decode_inputs:0': utt_mat, 'decode_seq_length:0': input_seq_length }) # print (outputs_value.shape) # print (type(outputs_value)) #get state likelihoods by dividing by the prior output = outputs_value / self.prior #floor the values to avoid problems with log np.where(output == 0, np.finfo(float).eps, output) # print (output.shape) # print (type(output)) #write the pseudo-likelihoods in kaldi feature format writer.write_next_utt(utt_id, np.log(output)) #close the writer writer.close()
def __call__(self, inputs, seq_length, is_training=False, reuse=False, scope=None): ''' Add the DNN variables and operations to the graph Args: inputs: the inputs to the neural network, this is a list containing a [batch_size, input_dim] tensor for each time step seq_length: The sequence lengths of the input utterances, if None the maximal sequence length will be taken is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A triple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations: -add: add a layer to the network -init: initialise the final layer ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #input layer layer = FFLayer(self.num_units, self.activation) #output layer outlayer = FFLayer(self.output_dim, TfActivation(None, lambda (x): x), 0) #do the forward computation #convert the sequential data to non sequential data nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) activations = [None] * self.num_layers activations[0] = layer(nonseq_inputs, is_training, reuse, 'layer0') for l in range(1, self.num_layers): activations[l] = layer(activations[l - 1], is_training, reuse, 'layer' + str(l)) if self.layerwise_init: #variable that determines how many layers are initialised #in the neural net initialisedlayers = tf.get_variable( 'initialisedlayers', [], initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32) #operation to increment the number of layers add_layer_op = initialisedlayers.assign(initialisedlayers + 1).op #compute the logits by selecting the activations at the layer #that has last been added to the network, this is used for layer #by layer initialisation logits = tf.case([(tf.equal(initialisedlayers, tf.constant(l)), Callable(activations[l])) for l in range(len(activations))], default=Callable(activations[-1]), exclusive=True, name='layerSelector') logits.set_shape([None, self.num_units]) else: logits = activations[-1] logits = outlayer(logits, is_training, reuse, 'layer' + str(self.num_layers)) if self.layerwise_init: #operation to initialise the final layer init_last_layer_op = tf.initialize_variables( tf.get_collection(tf.GraphKeys.VARIABLES, scope=(tf.get_variable_scope().name + '/layer' + str(self.num_layers)))) control_ops = {'add': add_layer_op, 'init': init_last_layer_op} else: control_ops = None #convert the logits to sequence logits to match expected output seq_logits = seq_convertors.nonseq2seq(logits, seq_length, len(inputs)) #create a saver saver = tf.train.Saver() return seq_logits, seq_length, saver, control_ops
def __call__(self, inputs, input_seq_length, targets=None, target_seq_length=None, is_training=False, reuse=False, scope=None): ''' Add the neural net variables and operations to the graph Args: inputs: the inputs to the neural network, this is a [batch_size x max_input_length x feature_dim] tensor input_seq_length: The sequence lengths of the input utterances, this is a [batch_size] dimansional vector targets: the targets to the neural network, this is a [batch_size x max_output_length x 1] tensor. The targets can be used during training target_seq_length: The sequence lengths of the target utterances, this is a [batch_size] dimansional vector is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A quadruple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations (may be empty) ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #the blstm layer blstm = BLSTMLayer(self.num_units) #the linear output layer outlayer = FFLayer(self.output_dim, TfActivation(None, lambda (x): x), 0) #do the forward computation #add gaussian noise to the inputs if is_training: logits = inputs + tf.random_normal(inputs.get_shape(), stddev=0.6) else: logits = inputs for layer in range(self.num_layers): logits = blstm(logits, input_seq_length, is_training, reuse, 'layer' + str(layer)) logits = self.activation(logits, is_training, reuse) logits = seq_convertors.seq2nonseq(logits, input_seq_length) logits = outlayer(logits, is_training, reuse, 'outlayer') logits = seq_convertors.nonseq2seq(logits, input_seq_length, int(inputs.get_shape()[1])) #create a saver saver = tf.train.Saver() return logits, input_seq_length, saver, None
def __call__(self, inputs, seq_length, is_training=False, reuse=False, scope=None): ''' Add the LSTM variables and operations to the graph Args: inputs: the inputs to the neural network, this is a list containing a [batch_size, input_dim] tensor for each time step seq_length: The sequence lengths of the input utterances, if None the maximal sequence length will be taken is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A triple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations: -add: add a layer to the network -init: initialise the final layer ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): weights = { 'out': tf.get_variable( 'weights_out', [self.num_units, self.output_dim], initializer=tf.contrib.layers.xavier_initializer()) } biases = { 'out': tf.get_variable('biases_out', [self.output_dim], initializer=tf.constant_initializer(0)) } #convert the sequential data to non sequential data nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) input_dim = nonseq_inputs.shape[1] nonseq_inputs = tf.reshape(nonseq_inputs, [-1, 11, 40]) n_steps = 11 nonseq_inputs = tf.transpose(nonseq_inputs, [1, 0, 2]) keep_prob = 1 # define the lstm cell # use the dropout in training mode if is_training and keep_prob < 1: lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( self.num_units, forget_bias=0.0, input_size=None, activation=tf.nn.relu, layer_norm=False, norm_gain=1.0, norm_shift=0.0, dropout_keep_prob=keep_prob, dropout_prob_seed=None) lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( self.num_units, forget_bias=0.0, input_size=None, activation=tf.nn.relu, layer_norm=False, norm_gain=1.0, norm_shift=0.0, dropout_keep_prob=1, dropout_prob_seed=None) # stack the lstm to form multi-layers cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * self.num_layers, state_is_tuple=True) # print(int(nonseq_inputs.shape[0])) # self._initial_state = cell.zero_state(int(nonseq_inputs.shape[0]), tf.float32) # apply the dropout for the inputs to the first hidden layer if is_training and keep_prob < 1: nonseq_inputs = tf.nn.dropout(nonseq_inputs, keep_prob) final_nonseq_inputs = tf.unstack(nonseq_inputs, num=n_steps, axis=0) # Get lstm cell output initial_state=self._initial_state, outputs, states = tf.contrib.rnn.static_rnn(cell, final_nonseq_inputs, dtype=tf.float32) outputs = outputs[-1] # Linear activation, using rnn inner loop last output logits = tf.matmul(outputs, weights['out']) + biases['out'] # # if self.layerwise_init: # # #variable that determines how many layers are initialised # # #in the neural net # # initialisedlayers = tf.get_variable( # # 'initialisedlayers', [], # # initializer=tf.constant_initializer(0), # # trainable=False, # # dtype=tf.int32) # # #operation to increment the number of layers # # add_layer_op = initialisedlayers.assign(initialisedlayers+1).op # # #compute the logits by selecting the activations at the layer # # #that has last been added to the network, this is used for layer # # #by layer initialisation # # logits = tf.case( # # [(tf.equal(initialisedlayers, tf.constant(l)), # # Callable(activations[l])) # # for l in range(len(activations))], # # default=Callable(activations[-1]), # # exclusive=True, name='layerSelector') # # logits.set_shape([None, self.num_units]) if self.layerwise_init: #operation to initialise the final layer init_last_layer_op = tf.initialize_variables( tf.get_collection(tf.GraphKeys.VARIABLES, scope=(tf.get_variable_scope().name + '/layer' + str(self.num_layers)))) control_ops = {'add': add_layer_op, 'init': init_last_layer_op} else: control_ops = None #convert the logits to sequence logits to match expected output seq_logits = seq_convertors.nonseq2seq(logits, seq_length, len(inputs)) #create a saver saver = tf.train.Saver() return seq_logits, seq_length, saver, control_ops
def __call__(self, inputs, seq_length, is_training=False, reuse=False, scope=None): ''' Add the DNN variables and operations to the graph Args: inputs: the inputs to the neural network, this is a list containing a [batch_size, input_dim] tensor for each time step seq_length: The sequence lengths of the input utterances, if None the maximal sequence length will be taken is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A triple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations: -add: add a layer to the network -init: initialise the final layer ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #input layer layer = FFLayer(self.num_units, self.activation) #output layer outlayer = FFLayer(self.output_dim, TfActivation(None, lambda x: x), 0) #convert the sequential data to non sequential data ## if you wanna use the pure dnn, please uncommit this line #nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) activations = [None] * self.num_layers # Define the first hidden layers # # the conv layer #cnn_layer = RestNet() #cnn_layer = CnnVd6() if self.cnn_type == 1: print('------The Cnn Config------') #convert the sequential data to non sequential data nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) cnn_layer = CnnLayer(self.cnn_conf) activations[0] = cnn_layer(nonseq_inputs, is_training, reuse, 'layer0') else: print("Not using CNN") # # the lstm layer, type 1 if self.lstm_type == 1: print('------The LSTM Config------') #convert the sequential data to non sequential data # the inputs format is: time List(such as 777), each element is 2-D tensor like: batch_size(such as 64) x fre-dim # the nonseq_inputs format is: batch_size x fre-dim, 2-D tensor, here the batch_size = batch_size x time nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) print( 'Type1: The lstm data process is the similar to dnn, use the stacking frame and not output state is reused' ) lstm_layer = LSTMLayer(self.lstm_conf) activations[0] = lstm_layer(nonseq_inputs, is_training, reuse, 'layer0') ## the lstm layer, type 2 elif self.lstm_type == 2: print('------The LSTM Config------') print('Type2: The lstm data process is totally sequencial') # here we directly use the seq data, that's para: inputs lstm_layer = LSTMLayer2(self.lstm_conf2) # the dynamic lstm's output has the format: time x batch_size x feature_dim seq_output = lstm_layer(inputs, seq_length, is_training, reuse, 'layer0') # to connect the dnn, we should tran the seq output to no-seq # so we can use directly with dnn activations[0] = seq_convertors.seq2nonseq( seq_output, seq_length) ## the lstm layer, type 3 elif self.lstm_type == 3: print('------The LSTM Config------') print('Type3: The lstm data is processed in sub-seq') # here we directly use the seq data, that's para: inputs lstm_layer = LSTMLayer3(self.lstm_conf3, self.max_input_length) # the dynamic lstm's output has the format: time x batch_size x feature_dim seq_output = lstm_layer(inputs, seq_length, is_training, reuse, 'layer0') # to connect the dnn, we should tran the seq output to no-seq # so we can use directly with dnn # Note: # the seq_output here should has the first index corresponding to the seq_length # shape like: [seq_length, batch-size, output-dim] activations[0] = seq_convertors.seq2nonseq( seq_output, seq_length) else: print("Not using LSTM") # define the FL hidden layers print('------The DNN Config------') print("use %d FL hidden layer" % (self.FL_num_layers)) for l in range(1, self.num_layers): print("the " + str(l) + " layer's input is: " + str(activations[l - 1].shape)) activations[l] = layer(activations[l - 1], is_training, reuse, 'layer' + str(l)) if self.layerwise_init: #variable that determines how many layers are initialised #in the neural net initialisedlayers = tf.get_variable( 'initialisedlayers', [], initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32) #operation to increment the number of layers add_layer_op = initialisedlayers.assign(initialisedlayers + 1).op #compute the logits by selecting the activations at the layer #that has last been added to the network, this is used for layer #by layer initialisation logits = tf.case([(tf.equal(initialisedlayers, tf.constant(l)), Callable(activations[l])) for l in range(len(activations))], default=Callable(activations[-1]), exclusive=True, name='layerSelector') logits.set_shape([None, self.num_units]) else: logits = activations[-1] logits = outlayer(logits, is_training, reuse, 'layer' + str(self.num_layers)) if self.layerwise_init: #operation to initialise the final layer init_last_layer_op = tf.initialize_variables( tf.get_collection( tf.GraphKeys.VARIABLES, scope=(tf.get_variable_scope().name + '/layer' + str(self.FL_num_layers)))) control_ops = {'add': add_layer_op, 'init': init_last_layer_op} else: control_ops = None #convert the logits to sequence logits to match expected output seq_logits = seq_convertors.nonseq2seq(logits, seq_length, len(inputs)) #create a saver saver = tf.train.Saver() return seq_logits, seq_length, saver, control_ops