def __call__(self, inputs, seq_length, is_training=False, reuse=False, scope=None): ''' Add the CNN variables and operations to the graph ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #input layer conv = Conv2dLayer(self.num_units, 3, 1) #output layer outlayer = FFLayer(self.output_dim, TfActivation(None, lambda (x): x), 0) time_steps = [inputs] num_time_steps = 11 print inputs[1] for i in range(num_time_steps): forward = tf.pad(inputs[:, i + 1:, :], [[0, 0][0, i + 1], [0, 0]]) backward = tf.pad(inputs[:, :-i - 1, :], [[0, 0][i + 1, 0], [0, 0]]) time_steps += [forward, backward] logits = tf.pack(time_steps, axis=3) #apply the input layer #logits = tf.expand_dims(inputs, 3) for l in range(1, self.num_layers): logits = conv(logits, seq_length, is_training, 'convlayer' + str(l)) logits = tf.nn.relu(logits) #stack all the output channels for the final layer logits = tf.reshape(logits, list(logits.get_shape()[0:2] + [-1])) #convert the logits to nonsequence logits for the output layer logits = seq_convertors.seq2nonseq(logits, seq_length) logits = outlayer(logits, seq_length, is_training, 'outlayer') #convert the logits to sequence logits to match expected output logits = seq_convertors.nonseq2seq(logits, seq_length, int(inputs.get_shape()[0])) #create a saver saver = tf.train.Saver() control_ops = None return seq_logits, seq_length, saver, control_ops
def __call__(self, inputs, input_seq_length, targets=None, target_seq_length=None, is_training=False, reuse=False, scope=None): ''' Add the neural net variables and operations to the graph Args: inputs: the inputs to the neural network, this is a [batch_size x max_input_length x feature_dim] tensor input_seq_length: The sequence lengths of the input utterances, this is a [batch_size] dimansional vector targets: the targets to the neural network, this is a [batch_size x max_output_length x 1] tensor. The targets can be used during training target_seq_length: The sequence lengths of the target utterances, this is a [batch_size] dimansional vector is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A quadruple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations (may be empty) ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #create the input layer inlayer = Conv1dlayer(self.num_units, self.kernel_size, 1) #create the gated convolutional layers dconv = GatedDilatedConvolution(self.kernel_size) #create the fully connected layer act = activation.TfActivation(None, tf.nn.relu) fflayer = FFLayer(self.num_units, act) #create the output layer act = activation.TfActivation(None, lambda x: x) outlayer = FFLayer(self.output_dim, act) #apply the input layer logits = 0 forward = inlayer(inputs, is_training, reuse, 'inlayer') #apply the the blocks of dilated convolutions layers for b in range(self.num_blocks): for l in range(self.num_layers): forward, highway = dconv(forward, 2**l, is_training, reuse, 'dconv%d-%d' % (b, l)) logits += highway #go to nonsequential data logits = seq_convertors.seq2nonseq(logits, input_seq_length) #apply the relu logits = tf.nn.relu(logits) #apply the fully connected layer logits = fflayer(logits, is_training, reuse, scope='FFlayer') #apply the output layer logits = outlayer(logits, is_training, reuse, scope='outlayer') #go back to sequential data logits = seq_convertors.nonseq2seq(logits, input_seq_length, int(inputs.get_shape()[1])) #create a saver saver = tf.train.Saver() return logits, input_seq_length, saver, None
def train_NN(self, config, train_important_information, valid_important_information): ########################## ### DATASET ########################## train_data_dir = config.get('directories', 'exp_dir') + '/train_features_dir' valid_data_dir = config.get('directories', 'exp_dir') + '/valid_features_dir' NN_dir = config.get('directories', 'exp_dir') + '/NN_train_dir' if not os.path.isdir(NN_dir): os.mkdir(NN_dir) logdir = NN_dir + '/logdir' if not os.path.isdir(logdir): os.mkdir(logdir) ######################### ### SETTINGS ########################## # Hyperparameters initial_learning_rate = float( config.get('simple_NN', 'initial_learning_rate')) decay_steps = int(config.get('simple_NN', 'decay_steps')) decay_rate = float(config.get('simple_NN', 'decay_rate')) # Architecture n_hidden = int(config.get('simple_NN', 'n_hidden')) hid_layer_num = int(config.get('simple_NN', 'hidden_layer_num')) n_input = train_important_information['input_dim'] training_epochs = int(config.get('simple_NN', 'training_epochs')) batch_size = int(config.get('simple_NN', 'train_batch_size')) valid_batch_total = valid_important_information['valid_batch_total'] n_classes = train_important_information['num_labels'] training_batch_total = train_important_information[ 'training_batch_total'] max_input_length = train_important_information['train_utt_max_length'] max_target_length = train_important_information[ 'train_label_max_length'] ########################## ### GRAPH DEFINITION ########################## g = tf.Graph() with g.as_default(): with tf.name_scope('input'): #create the inputs placeholder inputs = tf.placeholder( tf.float32, shape=[max_input_length, batch_size, n_input], name='features') #the length of all the input sequences input_seq_length = tf.placeholder(tf.int32, shape=[batch_size], name='input_seq_length') #split the 3D input tensor in a list of batch_size*input_dim tensors split_inputs = tf.unstack(inputs, name='split_inputs_training_op') #convert the sequential data to non sequential data nonseq_inputs = seq_convertors.seq2nonseq( split_inputs, input_seq_length, name='inputs-processing') with tf.name_scope('target'): #reference labels targets = tf.placeholder( tf.int32, shape=[max_target_length, batch_size, 1], name='targets') #the length of all the output sequences target_seq_length = tf.placeholder(tf.int32, shape=[batch_size], name='output_seq_length') # Model parameters with tf.name_scope("weights"): weights = { 'h' + str(i): tf.Variable(tf.truncated_normal([n_hidden, n_hidden], stddev=0.1), name="h" + str(i) + "_value") for i in range(2, hid_layer_num + 1) } weights['h1'] = tf.Variable(tf.truncated_normal( [n_input, n_hidden], stddev=0.1), name="h1_value") weights['out'] = tf.Variable(tf.truncated_normal( [n_hidden, n_classes], stddev=0.1), name="weight_out_value") with tf.name_scope("biases"): biases = { 'b' + str(i): tf.Variable(tf.zeros([n_hidden]), name="b" + str(i) + "_value") for i in range(1, hid_layer_num + 1) } biases['out'] = tf.Variable(tf.zeros([n_classes]), name="bias_out_value") # Multilayer perceptron with tf.name_scope("layer-1"): layer_1 = tf.add(tf.matmul(nonseq_inputs, weights['h1']), biases['b1']) layer_out = tf.nn.tanh(layer_1) for i in range(2, hid_layer_num + 1): with tf.name_scope("layer-" + str(i)): layer = tf.add(tf.matmul(layer_out, weights['h' + str(i)]), biases['b' + str(i)]) layer_out = tf.nn.tanh(layer) print "hidden layer " + str(i) with tf.name_scope("hid_out"): nonseq_logits = tf.add(tf.matmul(layer_out, weights['out']), biases['out']) with tf.name_scope("targets-processing"): #split the 3D targets tensor in a list of batch_size*1 tensors split_targets = tf.unstack(targets) nonseq_targets = seq_convertors.seq2nonseq( split_targets, target_seq_length, name="targets-processing") #make a vector out of the targets nonseq_targets = tf.reshape(nonseq_targets, [-1]) #one hot encode the targets #pylint: disable=E1101 end_nonseq_targets = tf.one_hot( nonseq_targets, int(nonseq_logits.get_shape()[1])) with tf.name_scope('soft_max'): # Loss and optimizer loss = tf.nn.softmax_cross_entropy_with_logits( logits=nonseq_logits, labels=end_nonseq_targets) cost = tf.reduce_mean(loss, name='cost_op') with tf.name_scope('train'): global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay( initial_learning_rate, global_step, decay_steps, decay_rate, staircase=True) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train = optimizer.minimize(cost, global_step=global_step, name='train_op') with tf.name_scope('Accuracy'): # Prediction correct_prediction = tf.equal(tf.argmax(end_nonseq_targets, 1), tf.argmax(nonseq_logits, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy_op') accuracy_valid = tf.reduce_mean(tf.cast( correct_prediction, tf.float32), name='valid-accuracy_op') #create a summary for our cost and accuracy tf.summary.scalar("cost", cost) tf.summary.scalar("train-accuracy", accuracy) tf.summary.scalar("valid-accuracy", accuracy_valid) tf.summary.histogram('histogram-train-accuracy', accuracy) tf.summary.histogram('histogram-valid-accuracy', accuracy_valid) # merge all summaries into a single "operation" which we can execute in a session summary_op = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=10000) ########################## ### TRAINING & EVALUATION ########################## config = tf.ConfigProto() #config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.95 with tf.Session(graph=g, config=config) as sess: sess.run(tf.global_variables_initializer()) # create log writer object writer = tf.summary.FileWriter(logdir, graph=tf.get_default_graph()) for epoch in range(training_epochs): avg_cost = 0. for i in range(training_batch_total): train_batch_x = np.load( gzip.GzipFile( train_data_dir + '/batch_inputs_' + str(i) + '.npy.gz', "r")) train_batch_y = np.load( gzip.GzipFile( train_data_dir + '/batch_targets_' + str(i) + '.npy.gz', "r")) train_input_seq_length = np.load( gzip.GzipFile( train_data_dir + '/batch_input_seq_length_' + str(i) + '.npy.gz', "r")) train_target_seq_length = np.load( gzip.GzipFile( train_data_dir + '/batch_output_seq_length_' + str(i) + '.npy.gz', "r")) # perform the operations we defined earlier on batch _, c, summary = sess.run( [train, cost, summary_op], feed_dict={ inputs: train_batch_x, targets: train_batch_y, input_seq_length: train_input_seq_length, target_seq_length: train_target_seq_length }) avg_cost += c # write log writer.add_summary(summary, epoch * training_batch_total + i) train_acc = 0 for j in range(training_batch_total): train_x = np.load( gzip.GzipFile( train_data_dir + '/batch_inputs_' + str(j) + '.npy.gz', "r")) train_y = np.load( gzip.GzipFile( train_data_dir + '/batch_targets_' + str(j) + '.npy.gz', "r")) train_x_seq_length = np.load( gzip.GzipFile( train_data_dir + '/batch_input_seq_length_' + str(j) + '.npy.gz', "r")) train_y_seq_length = np.load( gzip.GzipFile( train_data_dir + '/batch_output_seq_length_' + str(j) + '.npy.gz', "r")) train_batch_acc = sess.run(accuracy, feed_dict={ inputs: train_x, targets: train_y, input_seq_length: train_x_seq_length, target_seq_length: train_y_seq_length }) train_acc += train_batch_acc print "batch accuracy " + str(j) train_acc /= (training_batch_total) valid_acc = 0 for j in range(valid_batch_total): validation_x = np.load( gzip.GzipFile( valid_data_dir + '/batch_inputs_' + str(j) + '.npy.gz', "r")) validation_y = np.load( gzip.GzipFile( valid_data_dir + '/batch_targets_' + str(j) + '.npy.gz', "r")) validation_x_seq_length = np.load( gzip.GzipFile( valid_data_dir + '/batch_input_seq_length_' + str(j) + '.npy.gz', "r")) validation_y_seq_length = np.load( gzip.GzipFile( valid_data_dir + '/batch_output_seq_length_' + str(j) + '.npy.gz', "r")) validation_batch_acc = sess.run( accuracy_valid, feed_dict={ inputs: validation_x, targets: validation_y, input_seq_length: validation_x_seq_length, target_seq_length: validation_y_seq_length }) valid_acc += validation_batch_acc valid_acc /= valid_batch_total #print("Epoch: %03d | AvgCost: %.3f" % (epoch + 1, avg_cost / (i + 1)), end="") #print(" | Train/Valid ACC: %.3f/%.3f" % (train_acc, valid_acc)) accuracy_log_file = open(logdir + '/accuracy_log', "a") accuracy_log_file.write("Epoch: %03d | AvgCost: %.3f" % (epoch + 1, avg_cost / (i + 1))) accuracy_log_file.write(" | Train/Valid ACC: %.3f/%.3f" % (train_acc, valid_acc) + '\n') accuracy_log_file.close() saver.save(sess, NN_dir + '/model.ckpt', global_step=epoch + 1)
def decode_data(self, writer): self.retrieved_data() ########################## ### GRAPH DEFINITION ########################## g = tf.Graph() with g.as_default(): decode_inputs = tf.placeholder( tf.float32, shape=[self.max_length, self.input_dim], name='decode_inputs') decode_seq_length = tf.placeholder(tf.int32, shape=[1], name='decode_seq_length') split_inputs = tf.unstack(tf.expand_dims(decode_inputs, 1), name="decode_split_inputs_op") nonseq_inputs = seq_convertors.seq2nonseq(split_inputs, decode_seq_length) # Multilayer perceptron layer_1 = tf.add(tf.matmul(nonseq_inputs, self.weights_h1), self.bias_b1) layer_1 = tf.nn.tanh(layer_1) layer_2 = tf.add(tf.matmul(layer_1, self.weights_h2), self.bias_b2) layer_2 = tf.nn.tanh(layer_2) logits = tf.add(tf.matmul(layer_2, self.weights_out), self.bias_out, name="logits_op") seq_logits = seq_convertors.nonseq2seq(logits, decode_seq_length, len(split_inputs)) decode_logits = seq_convertors.seq2nonseq(seq_logits, decode_seq_length) outputs = tf.nn.softmax(decode_logits, name="final_operation") ########################## ### EVALUATION ########################## config = tf.ConfigProto() #config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(graph=g, config=config) as sess: #with tf.Session(graph=g) as sess: sess.run(tf.global_variables_initializer()) for i in range(self.total_uttarences): utt_id = self.utt_id_list[i] utt_mat = self.utt_dict[utt_id] input_seq_length = [utt_mat.shape[0]] #pad the inputs utt_mat = np.append( utt_mat, np.zeros( [self.max_length - utt_mat.shape[0], utt_mat.shape[1]]), 0) outputs_value = sess.run('final_operation:0', feed_dict={ 'decode_inputs:0': utt_mat, 'decode_seq_length:0': input_seq_length }) # print (outputs_value.shape) # print (type(outputs_value)) #get state likelihoods by dividing by the prior output = outputs_value / self.prior #floor the values to avoid problems with log np.where(output == 0, np.finfo(float).eps, output) # print (output.shape) # print (type(output)) #write the pseudo-likelihoods in kaldi feature format writer.write_next_utt(utt_id, np.log(output)) #close the writer writer.close()
def __call__(self, inputs, seq_length, is_training=False, reuse=False, scope=None): ''' Add the DNN variables and operations to the graph Args: inputs: the inputs to the neural network, this is a list containing a [batch_size, input_dim] tensor for each time step seq_length: The sequence lengths of the input utterances, if None the maximal sequence length will be taken is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A triple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations: -add: add a layer to the network -init: initialise the final layer ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #input layer layer = FFLayer(self.num_units, self.activation) #output layer outlayer = FFLayer(self.output_dim, TfActivation(None, lambda (x): x), 0) #do the forward computation #convert the sequential data to non sequential data nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) activations = [None] * self.num_layers activations[0] = layer(nonseq_inputs, is_training, reuse, 'layer0') for l in range(1, self.num_layers): activations[l] = layer(activations[l - 1], is_training, reuse, 'layer' + str(l)) if self.layerwise_init: #variable that determines how many layers are initialised #in the neural net initialisedlayers = tf.get_variable( 'initialisedlayers', [], initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32) #operation to increment the number of layers add_layer_op = initialisedlayers.assign(initialisedlayers + 1).op #compute the logits by selecting the activations at the layer #that has last been added to the network, this is used for layer #by layer initialisation logits = tf.case([(tf.equal(initialisedlayers, tf.constant(l)), Callable(activations[l])) for l in range(len(activations))], default=Callable(activations[-1]), exclusive=True, name='layerSelector') logits.set_shape([None, self.num_units]) else: logits = activations[-1] logits = outlayer(logits, is_training, reuse, 'layer' + str(self.num_layers)) if self.layerwise_init: #operation to initialise the final layer init_last_layer_op = tf.initialize_variables( tf.get_collection(tf.GraphKeys.VARIABLES, scope=(tf.get_variable_scope().name + '/layer' + str(self.num_layers)))) control_ops = {'add': add_layer_op, 'init': init_last_layer_op} else: control_ops = None #convert the logits to sequence logits to match expected output seq_logits = seq_convertors.nonseq2seq(logits, seq_length, len(inputs)) #create a saver saver = tf.train.Saver() return seq_logits, seq_length, saver, control_ops
def train_NN(self, config, train_important_information, valid_important_information): ########################## ### DATASET ########################## train_data_dir = config.get('directories', 'exp_dir') + '/train_features_dir' valid_data_dir = config.get('directories', 'exp_dir') + '/valid_features_dir' NN_dir = config.get('directories', 'exp_dir') + '/NN_train_dir_combined_acc_loss' if not os.path.isdir(NN_dir): os.mkdir(NN_dir) logdir = NN_dir + '/logdir' if not os.path.isdir(logdir): os.mkdir(logdir) ######################### ### SETTINGS ########################## # Hyperparameters changed_learning_rate = float( config.get('simple_NN', 'initial_learning_rate')) #decay_steps = int(config.get('simple_NN', 'decay_steps')) # we are using num_steps instead of this decay_rate = float(config.get('simple_NN', 'decay_rate')) # Architecture n_hidden = int(config.get('simple_NN', 'n_hidden')) hid_layer_num = int(config.get('simple_NN', 'hidden_layer_num')) n_input = train_important_information['input_dim'] training_epochs = int(config.get('simple_NN', 'training_epochs')) batch_size = int(config.get('simple_NN', 'train_batch_size')) valid_batch_total = valid_important_information['valid_batch_total'] n_classes = train_important_information['num_labels'] training_batch_total = train_important_information[ 'training_batch_total'] max_input_length = train_important_information['train_utt_max_length'] max_target_length = train_important_information[ 'train_label_max_length'] num_steps = training_epochs * training_batch_total valid_frequency = training_batch_total #means after each epoch total_number_of_retries = 3 ########################## ### GRAPH DEFINITION ########################## g = tf.Graph() with g.as_default(): #for making the learning rate half initial_learning_rate = tf.placeholder(tf.float32, None, name='initial_l_rate') learning_rate_factor = tf.placeholder(tf.float32, None, name='factor_value') with tf.name_scope('input'): #create the inputs placeholder inputs = tf.placeholder( tf.float32, shape=[max_input_length, batch_size, n_input], name='features') #the length of all the input sequences input_seq_length = tf.placeholder(tf.int32, shape=[batch_size], name='input_seq_length') #split the 3D input tensor in a list of batch_size*input_dim tensors split_inputs = tf.unstack(inputs, name='split_inputs_training_op') #convert the sequential data to non sequential data nonseq_inputs = seq_convertors.seq2nonseq( split_inputs, input_seq_length, name='inputs-processing') with tf.name_scope('target'): #reference labels targets = tf.placeholder( tf.int32, shape=[max_target_length, batch_size, 1], name='targets') #the length of all the output sequences target_seq_length = tf.placeholder(tf.int32, shape=[batch_size], name='output_seq_length') # Model parameters with tf.name_scope("weights"): weights = { 'h' + str(i): tf.Variable(tf.truncated_normal([n_hidden, n_hidden], stddev=0.1), name="h" + str(i) + "_value") for i in range(2, hid_layer_num + 1) } weights['h1'] = tf.Variable(tf.truncated_normal( [n_input, n_hidden], stddev=0.1), name="h1_value") weights['out'] = tf.Variable(tf.truncated_normal( [n_hidden, n_classes], stddev=0.1), name="weight_out_value") with tf.name_scope("biases"): biases = { 'b' + str(i): tf.Variable(tf.zeros([n_hidden]), name="b" + str(i) + "_value") for i in range(1, hid_layer_num + 1) } biases['out'] = tf.Variable(tf.zeros([n_classes]), name="bias_out_value") # Multilayer perceptron with tf.name_scope("layer-1"): layer_1 = tf.add(tf.matmul(nonseq_inputs, weights['h1']), biases['b1']) layer_out = tf.nn.tanh(layer_1) for i in range(2, hid_layer_num + 1): with tf.name_scope("layer-" + str(i)): layer = tf.add(tf.matmul(layer_out, weights['h' + str(i)]), biases['b' + str(i)]) layer_out = tf.nn.tanh(layer) with tf.name_scope("hid_out"): nonseq_logits = tf.add(tf.matmul(layer_out, weights['out']), biases['out']) with tf.name_scope("targets-processing"): #split the 3D targets tensor in a list of batch_size*1 tensors split_targets = tf.unstack(targets) nonseq_targets = seq_convertors.seq2nonseq( split_targets, target_seq_length, name="targets-processing") #make a vector out of the targets nonseq_targets = tf.reshape(nonseq_targets, [-1]) #one hot encode the targets #pylint: disable=E1101 end_nonseq_targets = tf.one_hot( nonseq_targets, int(nonseq_logits.get_shape()[1])) with tf.name_scope('soft_max'): # Loss and optimizer validation_loss = tf.nn.softmax_cross_entropy_with_logits( logits=nonseq_logits, labels=end_nonseq_targets) validation_cost = tf.reduce_mean(validation_loss, name='validation_cost_op') train_loss = tf.nn.softmax_cross_entropy_with_logits( logits=nonseq_logits, labels=end_nonseq_targets) train_cost = tf.reduce_mean(train_loss, name='train_cost_op') with tf.name_scope('train'): global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay( initial_learning_rate, global_step, num_steps, decay_rate, staircase=True) * learning_rate_factor #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train = optimizer.minimize(train_cost, global_step=global_step, name='train_op') with tf.name_scope('Accuracy'): # Prediction correct_prediction = tf.equal(tf.argmax(end_nonseq_targets, 1), tf.argmax(nonseq_logits, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy_op') accuracy_valid = tf.reduce_mean(tf.cast( correct_prediction, tf.float32), name='valid-accuracy_op') #create a summary for our cost and accuracy tf.summary.scalar("Train Loss", train_cost) tf.summary.scalar("Validation Loss", validation_cost) tf.summary.scalar("Training Accuracy", accuracy) tf.summary.scalar("Validation Accuracy", accuracy_valid) # merge all summaries into a single "operation" which we can execute in a session summary_op = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=10000) ########################## ### TRAINING & EVALUATION ########################## config = tf.ConfigProto() #config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(graph=g, config=config) as sess: #with tf.Session(graph=g) as sess: sess.run(tf.global_variables_initializer()) # create log writer object writer = tf.summary.FileWriter(logdir, graph=tf.get_default_graph()) step = 0 epoch = 0 validation_loss = 100 validation_accuracy = 0 print "First validation loss: " + str(validation_loss) print "First validation accuracy: " + str( validation_accuracy) + "\n" validation_step = step num_retries = 0 train_batch_number = 1 train_file = 0 factor = 1.0 train_acc = 0 while step < num_steps: train_batch_x = np.load( gzip.GzipFile( train_data_dir + '/batch_inputs_' + str(train_file) + '.npy.gz', "r")) train_batch_y = np.load( gzip.GzipFile( train_data_dir + '/batch_targets_' + str(train_file) + '.npy.gz', "r")) train_input_seq_length = np.load( gzip.GzipFile( train_data_dir + '/batch_input_seq_length_' + str(train_file) + '.npy.gz', "r")) train_target_seq_length = np.load( gzip.GzipFile( train_data_dir + '/batch_output_seq_length_' + str(train_file) + '.npy.gz', "r")) learning_rate_value, _, loss, train_batch_acc, summary = sess.run( [learning_rate, train, train_cost, accuracy, summary_op], feed_dict={ inputs: train_batch_x, targets: train_batch_y, input_seq_length: train_input_seq_length, target_seq_length: train_target_seq_length, learning_rate_factor: factor, initial_learning_rate: changed_learning_rate }) changed_learning_rate = learning_rate_value train_acc += train_batch_acc if factor == 0.5: factor = 1.0 #print "Step number: "+ str(step+1) + " Training Batch Number: "+ str(train_file+1)+" Learning Rate: " + str(learning_rate_value) train_batch_number = train_batch_number + 1 train_file = (train_batch_number % training_batch_total) - 1 if train_file == -1: train_file = training_batch_total - 1 # write log to display in tensorboard writer.add_summary(summary, train_batch_number) step = step + 1 if step % valid_frequency == 0: epoch = train_batch_number / training_batch_total sum_batch_current_loss = 0 valid_acc = 0 for valid_file in range(valid_batch_total): validation_x = np.load( gzip.GzipFile( valid_data_dir + '/batch_inputs_' + str(valid_file) + '.npy.gz', "r")) validation_y = np.load( gzip.GzipFile( valid_data_dir + '/batch_targets_' + str(valid_file) + '.npy.gz', "r")) validation_x_seq_length = np.load( gzip.GzipFile( valid_data_dir + '/batch_input_seq_length_' + str(valid_file) + '.npy.gz', "r")) validation_y_seq_length = np.load( gzip.GzipFile( valid_data_dir + '/batch_output_seq_length_' + str(valid_file) + '.npy.gz', "r")) loss, validation_batch_acc, summary = sess.run( [validation_cost, accuracy_valid, summary_op], feed_dict={ inputs: validation_x, targets: validation_y, input_seq_length: validation_x_seq_length, target_seq_length: validation_y_seq_length }) sum_batch_current_loss += loss valid_acc += validation_batch_acc current_loss = sum_batch_current_loss / valid_batch_total valid_acc /= valid_batch_total #we will only check rounded 3 decimal points current_validation_accuracy = float( format(valid_acc, '.3f')) train_acc /= (training_batch_total) # writing accuracy information in a log file accuracy_log_file = open(logdir + '/accuracy_log', "a") print "\nEpoch: %03d Train/Valid Accuracy: %.3f/%.3f\n" % ( epoch, train_acc, valid_acc) accuracy_log_file.write( "Epoch: %03d | Learning Rate: %f | Train/Valid ACC: %.3f/%.3f" % (epoch, learning_rate_value, train_acc, valid_acc) + "\n") accuracy_log_file.close() train_acc = 0 if current_loss >= validation_loss or current_validation_accuracy <= validation_accuracy: print "Make learning rate half, Current_loss: " + str( current_loss) + " Validation_loss: " + str( validation_loss) print "Epoch: " + str(epoch) + " Step number: " + str( step + 1) + " Training Batch Number: " + str( train_file + 1) + " New Learning Rate: " + str( learning_rate_value * .5) factor = 0.5 step = validation_step validation_accuracy = current_validation_accuracy num_retries = num_retries + 1 print "Number of Retries: " + str(num_retries) + "\n" if num_retries == total_number_of_retries: saver.save(sess, NN_dir + '/model.ckpt', global_step=train_batch_number - 1) save_batch_file = open( NN_dir + '/save_batch_number', "w") save_batch_file.write(str(train_batch_number - 1)) save_batch_file.close() print "Number of retries reaches maximum, finishing training the model" break continue else: print "Keep learning rate same, Current_loss: " + str( current_loss) + " Validation_loss: " + str( validation_loss) print "Epoch: " + str(epoch) + " Step number: " + str( step + 1) + " Training Batch Number: " + str( train_file + 1) + " New Learning Rate: " + str( learning_rate_value) + "\n" factor = 1.0 validation_loss = current_loss validation_accuracy = current_validation_accuracy validation_step = step num_retries = 0 if step == num_steps: saver.save(sess, NN_dir + '/model.ckpt', global_step=train_batch_number - 1) save_batch_file = open(NN_dir + '/save_batch_number', "w") save_batch_file.write(str(train_batch_number - 1)) save_batch_file.close()
def __call__(self, inputs, input_seq_length, targets=None, target_seq_length=None, is_training=False, reuse=False, scope=None): ''' Add the neural net variables and operations to the graph Args: inputs: the inputs to the neural network, this is a [batch_size x max_input_length x feature_dim] tensor input_seq_length: The sequence lengths of the input utterances, this is a [batch_size] dimansional vector targets: the targets to the neural network, this is a [batch_size x max_output_length x 1] tensor. The targets can be used during training target_seq_length: The sequence lengths of the target utterances, this is a [batch_size] dimansional vector is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A quadruple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations (may be empty) ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #the blstm layer blstm = BLSTMLayer(self.num_units) #the linear output layer outlayer = FFLayer(self.output_dim, TfActivation(None, lambda (x): x), 0) #do the forward computation #add gaussian noise to the inputs if is_training: logits = inputs + tf.random_normal(inputs.get_shape(), stddev=0.6) else: logits = inputs for layer in range(self.num_layers): logits = blstm(logits, input_seq_length, is_training, reuse, 'layer' + str(layer)) logits = self.activation(logits, is_training, reuse) logits = seq_convertors.seq2nonseq(logits, input_seq_length) logits = outlayer(logits, is_training, reuse, 'outlayer') logits = seq_convertors.nonseq2seq(logits, input_seq_length, int(inputs.get_shape()[1])) #create a saver saver = tf.train.Saver() return logits, input_seq_length, saver, None
def decode_data(self, writer): self.retrieved_data() ########################## ### GRAPH DEFINITION ########################## g = tf.Graph() with g.as_default(): decode_inputs = tf.placeholder( tf.float32, shape=[self.max_length, self.input_dim], name='inputs') decode_seq_length = tf.placeholder(tf.int32, shape=[1], name='seq_length') split_inputs = tf.unstack(tf.expand_dims(decode_inputs, 1), name="decode_split_inputs_op") nonseq_inputs = seq_convertors.seq2nonseq(split_inputs, decode_seq_length) inputs_img = tf.reshape( nonseq_inputs, tf.stack([tf.shape(nonseq_inputs)[0], 7, 1, 13])) inputs_img = tf.transpose(inputs_img, [0, 1, 3, 2]) print 'Input Img: ' print inputs_img.get_shape().as_list() hidden_1 = self.convolution(inputs_img, self.conv1_weights, self.conv1_biases) pool = tf.nn.max_pool(hidden_1, ksize=[1, 3, 1, 1], strides=[1, 1, 1, 1], padding='VALID') print 'poll_l1: ' print pool.get_shape().as_list() hidden_2 = self.convolution(pool, self.conv2_weights, self.conv2_biases) shape = hidden_2.get_shape().as_list() conv_outputs = tf.reshape( hidden_2, tf.stack( [tf.shape(hidden_2)[0], shape[1] * shape[2] * shape[3]])) print 'Outputs: ' print conv_outputs.get_shape().as_list() # Multilayer perceptron #-----------Start----------- layer_1 = tf.add(tf.matmul(conv_outputs, self.weights['h1']), self.biases['b1']) layer_out = tf.nn.tanh(layer_1) for i in range(2, self.hid_layer_num + 1): layer = tf.add( tf.matmul(layer_out, self.weights['h' + str(i)]), self.biases['b' + str(i)]) layer_out = tf.nn.tanh(layer) logits = tf.add(tf.matmul(layer_out, self.weights['out']), self.biases['out']) outputs = tf.nn.softmax(logits, name="final_operation") #--------- End-------------------------- ########################## ### EVALUATION ########################## config = tf.ConfigProto() #config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(graph=g, config=config) as sess: #with tf.Session(graph=g) as sess: sess.run(tf.global_variables_initializer()) for i in range(self.total_uttarences): utt_id = self.utt_id_list[i] utt_mat = self.utt_dict[utt_id] input_seq_length = [utt_mat.shape[0]] #print "This is the input length: "+str(input_seq_length) #pad the inputs utt_mat = np.append( utt_mat, np.zeros( [self.max_length - utt_mat.shape[0], utt_mat.shape[1]]), 0) outputs_value = sess.run(outputs, feed_dict={ decode_inputs: utt_mat, decode_seq_length: input_seq_length }) print str(i + 1) + " " + str( self.total_uttarences) + " " + str(utt_id) + " " + str( outputs_value.shape) #get state likelihoods by dividing by the prior output = outputs_value / self.prior #floor the values to avoid problems with log output = np.where(output == 0, np.finfo(float).eps, output) # print (output.shape) # print (type(output)) #write the pseudo-likelihoods in kaldi feature format writer.write_next_utt(utt_id, np.log(output)) #close the writer writer.close()
def __call__(self, inputs, seq_length, is_training=False, reuse=False, scope=None): ''' Add the LSTM variables and operations to the graph Args: inputs: the inputs to the neural network, this is a list containing a [batch_size, input_dim] tensor for each time step seq_length: The sequence lengths of the input utterances, if None the maximal sequence length will be taken is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A triple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations: -add: add a layer to the network -init: initialise the final layer ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): weights = { 'out': tf.get_variable( 'weights_out', [self.num_units, self.output_dim], initializer=tf.contrib.layers.xavier_initializer()) } biases = { 'out': tf.get_variable('biases_out', [self.output_dim], initializer=tf.constant_initializer(0)) } #convert the sequential data to non sequential data nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) input_dim = nonseq_inputs.shape[1] nonseq_inputs = tf.reshape(nonseq_inputs, [-1, 11, 40]) n_steps = 11 nonseq_inputs = tf.transpose(nonseq_inputs, [1, 0, 2]) keep_prob = 1 # define the lstm cell # use the dropout in training mode if is_training and keep_prob < 1: lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( self.num_units, forget_bias=0.0, input_size=None, activation=tf.nn.relu, layer_norm=False, norm_gain=1.0, norm_shift=0.0, dropout_keep_prob=keep_prob, dropout_prob_seed=None) lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( self.num_units, forget_bias=0.0, input_size=None, activation=tf.nn.relu, layer_norm=False, norm_gain=1.0, norm_shift=0.0, dropout_keep_prob=1, dropout_prob_seed=None) # stack the lstm to form multi-layers cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * self.num_layers, state_is_tuple=True) # print(int(nonseq_inputs.shape[0])) # self._initial_state = cell.zero_state(int(nonseq_inputs.shape[0]), tf.float32) # apply the dropout for the inputs to the first hidden layer if is_training and keep_prob < 1: nonseq_inputs = tf.nn.dropout(nonseq_inputs, keep_prob) final_nonseq_inputs = tf.unstack(nonseq_inputs, num=n_steps, axis=0) # Get lstm cell output initial_state=self._initial_state, outputs, states = tf.contrib.rnn.static_rnn(cell, final_nonseq_inputs, dtype=tf.float32) outputs = outputs[-1] # Linear activation, using rnn inner loop last output logits = tf.matmul(outputs, weights['out']) + biases['out'] # # if self.layerwise_init: # # #variable that determines how many layers are initialised # # #in the neural net # # initialisedlayers = tf.get_variable( # # 'initialisedlayers', [], # # initializer=tf.constant_initializer(0), # # trainable=False, # # dtype=tf.int32) # # #operation to increment the number of layers # # add_layer_op = initialisedlayers.assign(initialisedlayers+1).op # # #compute the logits by selecting the activations at the layer # # #that has last been added to the network, this is used for layer # # #by layer initialisation # # logits = tf.case( # # [(tf.equal(initialisedlayers, tf.constant(l)), # # Callable(activations[l])) # # for l in range(len(activations))], # # default=Callable(activations[-1]), # # exclusive=True, name='layerSelector') # # logits.set_shape([None, self.num_units]) if self.layerwise_init: #operation to initialise the final layer init_last_layer_op = tf.initialize_variables( tf.get_collection(tf.GraphKeys.VARIABLES, scope=(tf.get_variable_scope().name + '/layer' + str(self.num_layers)))) control_ops = {'add': add_layer_op, 'init': init_last_layer_op} else: control_ops = None #convert the logits to sequence logits to match expected output seq_logits = seq_convertors.nonseq2seq(logits, seq_length, len(inputs)) #create a saver saver = tf.train.Saver() return seq_logits, seq_length, saver, control_ops
def __call__(self, inputs, seq_length, is_training=False, reuse=False, scope=None): ''' Add the DNN variables and operations to the graph Args: inputs: the inputs to the neural network, this is a list containing a [batch_size, input_dim] tensor for each time step seq_length: The sequence lengths of the input utterances, if None the maximal sequence length will be taken is_training: whether or not the network is in training mode reuse: wheter or not the variables in the network should be reused scope: the name scope Returns: A triple containing: - output logits - the output logits sequence lengths as a vector - a saver object - a dictionary of control operations: -add: add a layer to the network -init: initialise the final layer ''' with tf.variable_scope(scope or type(self).__name__, reuse=reuse): #input layer layer = FFLayer(self.num_units, self.activation) #output layer outlayer = FFLayer(self.output_dim, TfActivation(None, lambda x: x), 0) #convert the sequential data to non sequential data ## if you wanna use the pure dnn, please uncommit this line #nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) activations = [None] * self.num_layers # Define the first hidden layers # # the conv layer #cnn_layer = RestNet() #cnn_layer = CnnVd6() if self.cnn_type == 1: print('------The Cnn Config------') #convert the sequential data to non sequential data nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) cnn_layer = CnnLayer(self.cnn_conf) activations[0] = cnn_layer(nonseq_inputs, is_training, reuse, 'layer0') else: print("Not using CNN") # # the lstm layer, type 1 if self.lstm_type == 1: print('------The LSTM Config------') #convert the sequential data to non sequential data # the inputs format is: time List(such as 777), each element is 2-D tensor like: batch_size(such as 64) x fre-dim # the nonseq_inputs format is: batch_size x fre-dim, 2-D tensor, here the batch_size = batch_size x time nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) print( 'Type1: The lstm data process is the similar to dnn, use the stacking frame and not output state is reused' ) lstm_layer = LSTMLayer(self.lstm_conf) activations[0] = lstm_layer(nonseq_inputs, is_training, reuse, 'layer0') ## the lstm layer, type 2 elif self.lstm_type == 2: print('------The LSTM Config------') print('Type2: The lstm data process is totally sequencial') # here we directly use the seq data, that's para: inputs lstm_layer = LSTMLayer2(self.lstm_conf2) # the dynamic lstm's output has the format: time x batch_size x feature_dim seq_output = lstm_layer(inputs, seq_length, is_training, reuse, 'layer0') # to connect the dnn, we should tran the seq output to no-seq # so we can use directly with dnn activations[0] = seq_convertors.seq2nonseq( seq_output, seq_length) ## the lstm layer, type 3 elif self.lstm_type == 3: print('------The LSTM Config------') print('Type3: The lstm data is processed in sub-seq') # here we directly use the seq data, that's para: inputs lstm_layer = LSTMLayer3(self.lstm_conf3, self.max_input_length) # the dynamic lstm's output has the format: time x batch_size x feature_dim seq_output = lstm_layer(inputs, seq_length, is_training, reuse, 'layer0') # to connect the dnn, we should tran the seq output to no-seq # so we can use directly with dnn # Note: # the seq_output here should has the first index corresponding to the seq_length # shape like: [seq_length, batch-size, output-dim] activations[0] = seq_convertors.seq2nonseq( seq_output, seq_length) else: print("Not using LSTM") # define the FL hidden layers print('------The DNN Config------') print("use %d FL hidden layer" % (self.FL_num_layers)) for l in range(1, self.num_layers): print("the " + str(l) + " layer's input is: " + str(activations[l - 1].shape)) activations[l] = layer(activations[l - 1], is_training, reuse, 'layer' + str(l)) if self.layerwise_init: #variable that determines how many layers are initialised #in the neural net initialisedlayers = tf.get_variable( 'initialisedlayers', [], initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32) #operation to increment the number of layers add_layer_op = initialisedlayers.assign(initialisedlayers + 1).op #compute the logits by selecting the activations at the layer #that has last been added to the network, this is used for layer #by layer initialisation logits = tf.case([(tf.equal(initialisedlayers, tf.constant(l)), Callable(activations[l])) for l in range(len(activations))], default=Callable(activations[-1]), exclusive=True, name='layerSelector') logits.set_shape([None, self.num_units]) else: logits = activations[-1] logits = outlayer(logits, is_training, reuse, 'layer' + str(self.num_layers)) if self.layerwise_init: #operation to initialise the final layer init_last_layer_op = tf.initialize_variables( tf.get_collection( tf.GraphKeys.VARIABLES, scope=(tf.get_variable_scope().name + '/layer' + str(self.FL_num_layers)))) control_ops = {'add': add_layer_op, 'init': init_last_layer_op} else: control_ops = None #convert the logits to sequence logits to match expected output seq_logits = seq_convertors.nonseq2seq(logits, seq_length, len(inputs)) #create a saver saver = tf.train.Saver() return seq_logits, seq_length, saver, control_ops