def add_tensorboard(self, session, tensorboard_dir, timeline_enabled=False): """ Add the tensorboard operations to the acoustic RNN This method will add ops to feed tensorboard self.train_summaries_op : will produce the summary for a training step self.test_summaries_op : will produce the summary for a test step self.summary_writer_op : will write the summary to disk Parameters ---------- :param session: the tensorflow session :param tensorboard_dir: path to tensorboard directory :param tb_run_name: directory name for the tensorboard files inside tensorboard_dir, if None a default dir will be created :param timeline_enabled: enable the output of a trace file for timeline visualization """ self.tensorboard_dir = tensorboard_dir self.timeline_enabled = timeline_enabled # Define GraphKeys for TensorBoard graphkey_training = tf.GraphKeys() graphkey_test = tf.GraphKeys() # Learning rate tf.summary.scalar('Learning_rate', self.learning_rate_var, collections=[graphkey_training, graphkey_test]) # Loss with tf.name_scope('Mean_loss'): mean_loss = tf.divide(self.accumulated_mean_loss, self.mini_batch) tf.summary.scalar('Training', mean_loss, collections=[graphkey_training]) tf.summary.scalar('Test', mean_loss, collections=[graphkey_test]) # Accuracy with tf.name_scope('Accuracy_-_Error_Rate'): mean_error_rate = tf.divide(self.accumulated_error_rate, self.mini_batch) tf.summary.scalar('Training', mean_error_rate, collections=[graphkey_training]) tf.summary.scalar('Test', mean_error_rate, collections=[graphkey_test]) # Hidden state with tf.name_scope('RNN_internal_state'): for idx, state_variable in enumerate(self.rnn_tuple_state): tf.summary.histogram('Training_layer-{0}_cell_state'.format(idx), state_variable[0], collections=[graphkey_training]) tf.summary.histogram('Test_layer-{0}_cell_state'.format(idx), state_variable[0], collections=[graphkey_test]) tf.summary.histogram('Training_layer-{0}_hidden_state'.format(idx), state_variable[1], collections=[graphkey_training]) tf.summary.histogram('Test_layer-{0}_hidden_state'.format(idx), state_variable[1], collections=[graphkey_test]) self.train_summaries_op = tf.summary.merge_all(key=graphkey_training) self.test_summaries_op = tf.summary.merge_all(key=graphkey_test) if not self.is_ditributed: self.summary_writer_op = tf.summary.FileWriter(tensorboard_dir, graph=session.graph)
def _init(self, sess): variables = tf.get_collection(tf.GraphKeys().VARIABLES) # TODO variable_names = set( [get_savename_from_varname(k.name) for k in variables]) param_names = set(six.iterkeys(self.prms)) intersect = variable_names & param_names logger.info("Params to restore: {}".format(', '.join( map(str, intersect)))) for k in variable_names - param_names: if not is_training_name(k): logger.warn( "Variable {} in the graph not found in the dict!".format( k)) for k in param_names - variable_names: logger.warn( "Variable {} in the dict not found in the graph!".format(k)) upd = SessionUpdate(sess, [v for v in variables if \ get_savename_from_varname(v.name) in intersect]) logger.info("Restoring from dict ...") upd.update({ name: value for name, value in six.iteritems(self.prms) if name in intersect })
def __init__(self, filename): tf.train.import_meta_graph(filename) all_coll = tf.get_default_graph().get_all_collection_keys() for k in [INPUT_VARS_KEY, tf.GraphKeys.TRAINABLE_VARIABLES, tf.GraphKeys().VARIABLES]: assert k in all_coll, \ "Collection {} not found in metagraph!".format(k)
def main(self, args=None): self.args = self.parsearg(args) self.loadModelParams() x = tf.placeholder(tf.float32, [None, 4096 * 2]) y = tf.placeholder(tf.int32, [None]) w = tf.Variable(tf.truncated_normal([4096 * 2, 2], stddev=np.sqrt(0.5))) b = tf.Variable(tf.zeros([2])) pred = tf.nn.softmax(tf.matmul(x, w) + b) cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=pred)) optimizer = tf.train.AdamOptimizer( self.args.learning_rate).minimize(cost) #Saving the variables save_list = [var for var in tf.global_variables()] self.saver = tf.train.Saver(save_list) self.sess = tf.Session() #Tensorboard graphkey_training = tf.GraphKeys() with tf.name_scope("Loss"): tf.summary.scalar('Training', cost) train_sum_op = tf.summary.merge_all() run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S') self.writer = tf.summary.FileWriter(self.tensorboard_dir + run_name + '/', graph=self.sess.graph) self.initializer = tf.initialize_all_variables() self.sess.run(self.initializer) #training model = self.Model_dir + self.Model_name + '-' + '.ckpt' self.saver.restore(sess, model) print('model restored') return for epoch in range(self.args.num_epoch): data_gen = self.data_iter() for features, labels in tqdm(data_gen, desc='Training'): _, c, summary = self.sess.run([optimizer, cost, train_sum_op], feed_dict={ x: features, y: labels }) self.writer.add_summary(summary, self.global_step) self.global_step += 1 if self.global_step % self.args.save_every == 0: model = self.Model_dir + self.Model_name + '-' + '.ckpt' self.saver.save(self.sess, model) tqdm.write("----- Step %d -- Loss %.2f " % (self.global_step, c))
def __init__(self, lr, n_actions, name, fcl_dims=256, input_dims=(210, 160, 4), chkpt_dir='tmp/dqn'): self.lr = lr self.name = name self.n_actions = n_actions self.fcl_dims = fcl_dims self.input_dims = input_dims self.sess = tf.Session() self.build_network() self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() self.checkpoint_file = os.path.join(chkpt_dir, 'deepqnet.ckpt') self.params = tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES, scope=self.name)
def __init__(self, session, num_labels, num_layers, hidden_size, dropout, batch_size, learning_rate, lr_decay_factor, grad_clip, max_input_seq_length, max_target_seq_length, input_dim, forward_only=False, tensorboard_dir=None, tb_run_name=None): """ Acoustic rnn model, using ctc loss with lstm cells Inputs: session - tensorflow session num_labels - dimension of character input/one hot encoding num_layers - number of lstm layers hidden_size - size of hidden layers dropout - probability of dropping hidden weights batch_size - number of training examples fed at once learning_rate - learning rate parameter fed to optimizer lr_decay_factor - decay factor of the learning rate grad_clip - max gradient size (prevent exploding gradients) max_input_seq_length - maximum length of input vector sequence max_target_seq_length - maximum length of ouput vector sequence input_dim - dimension of input vector forward_only - whether to build back prop nodes or not tensorboard_dir - path to tensorboard file (None if not activated) """ # Define GraphKeys for TensorBoard graphkey_training = tf.GraphKeys() graphkey_test = tf.GraphKeys() self.dropout = dropout self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate') tf.scalar_summary('Learning rate', self.learning_rate, collections=[graphkey_training, graphkey_test]) self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * lr_decay_factor) self.global_step = tf.Variable(0, trainable=False, name='global_step') self.dropout_keep_prob_lstm_input = tf.constant(self.dropout) self.dropout_keep_prob_lstm_output = tf.constant(self.dropout) self.max_input_seq_length = max_input_seq_length self.max_target_seq_length = max_target_seq_length self.tensorboard_dir = tensorboard_dir # Initialize data pipes and audio_processor to None self.train_conn = None self.test_conn = None self.audio_processor = None # graph inputs self.inputs = tf.placeholder(tf.float32, shape=[self.max_input_seq_length, None, input_dim], name="inputs") # We could take an int16 for less memory consumption but CTC need an int32 self.input_seq_lengths = tf.placeholder(tf.int32, shape=[None], name="input_seq_lengths") # Take an int16 for less memory consumption # max_target_seq_length should be less than 65535 (which is huge) self.target_seq_lengths = tf.placeholder(tf.int16, shape=[None], name="target_seq_lengths") # Define cells of acoustic model cell = rnn_cell.BasicLSTMCell(hidden_size, state_is_tuple=True) if not forward_only: # If we are in training then add a dropoutWrapper to the cells cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=self.dropout_keep_prob_lstm_input, output_keep_prob=self.dropout_keep_prob_lstm_output) if num_layers > 1: cell = rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True) # build input layer with tf.name_scope('Input_Layer'): w_i = tf.Variable(tf.truncated_normal([input_dim, hidden_size], stddev=np.sqrt(2.0 / (2 * hidden_size))), name="input_w") b_i = tf.Variable(tf.zeros([hidden_size]), name="input_b") # make rnn inputs inputs = [tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_i) + b_i for i in tf.split(0, self.max_input_seq_length, self.inputs)] # set rnn init state to 0s init_state = cell.zero_state(self.batch_size, tf.float32) # build rnn with tf.name_scope('Dynamic_rnn'): rnn_output, self.hidden_state = rnn.dynamic_rnn(cell, tf.pack(inputs), sequence_length=self.input_seq_lengths, initial_state=init_state, time_major=True, parallel_iterations=1000) # build output layer with tf.name_scope('Output_layer'): w_o = tf.Variable(tf.truncated_normal([hidden_size, num_labels], stddev=np.sqrt(2.0 / (2 * num_labels))), name="output_w") b_o = tf.Variable(tf.zeros([num_labels]), name="output_b") # compute logits self.logits = tf.pack([tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_o) + b_o for i in tf.split(0, self.max_input_seq_length, rnn_output)]) # compute prediction self.prediction = tf.to_int32(ctc.ctc_beam_search_decoder(self.logits, self.input_seq_lengths)[0][0]) if not forward_only: # graph sparse tensor inputs # We could take an int16 for less memory consumption but SparseTensor need an int64 self.target_indices = tf.placeholder(tf.int64, shape=[None, 2], name="target_indices") # We could take an int8 for less memory consumption but CTC need an int32 self.target_vals = tf.placeholder(tf.int32, shape=[None], name="target_vals") # setup sparse tensor for input into ctc loss sparse_labels = tf.SparseTensor( indices=self.target_indices, values=self.target_vals, shape=[self.batch_size, self.max_target_seq_length]) # compute ctc loss self.ctc_loss = ctc.ctc_loss(self.logits, sparse_labels, self.input_seq_lengths) self.mean_loss = tf.reduce_mean(self.ctc_loss) tf.scalar_summary('Mean loss (Training)', self.mean_loss, collections=[graphkey_training]) tf.scalar_summary('Mean loss (Test)', self.mean_loss, collections=[graphkey_test]) params = tf.trainable_variables() opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.ctc_loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, grad_clip) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) # Accuracy with tf.name_scope('Accuracy'): errorRate = tf.reduce_sum(tf.edit_distance(self.prediction, sparse_labels, normalize=False)) / \ tf.to_float(tf.size(sparse_labels.values)) tf.scalar_summary('Error Rate (Training)', errorRate, collections=[graphkey_training]) tf.scalar_summary('Error Rate (Test)', errorRate, collections=[graphkey_test]) # TensorBoard init if self.tensorboard_dir is not None: self.train_summaries = tf.merge_all_summaries(key=graphkey_training) self.test_summaries = tf.merge_all_summaries(key=graphkey_test) if tb_run_name is None: run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S') else: run_name = tb_run_name self.summary_writer = tf.train.SummaryWriter(tensorboard_dir + '/' + run_name + '/', graph=session.graph) else: self.summary_writer = None # We need to save all variables except for the hidden_state # we keep it across batches but we don't need it across different runs # Especially when we process a one time file save_list = [var for var in tf.all_variables() if var.name.find('hidden_state') == -1] self.saver = tf.train.Saver(save_list)
def get_network_params(self): network_params = tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES, scope=self._name) network_params = [variable for variable in network_params if "Std" not in variable.name] return network_params
def get_network_params(self): return tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES, scope=self._name)
def run_FUCOS(**kwargs): training_data = kwargs.get('training_data') validation_data = kwargs.get('validation_data') batchsize = kwargs.get('batchsize') TRAIN = kwargs.get('TRAIN', True) run = kwargs.get('run') config_sess = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config_sess.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config_sess) #build the model model = [] with tf.device('/gpu:2'): x = tf.placeholder(tf.float32, (None, 135, 240, 3), 'input') y_ = tf.placeholder(tf.float32, (None, 135, 240, 1), 'gt') keep_prob = tf.placeholder(tf.float32, name='dropout_prob') with tf.variable_scope('conv1'): conv1 = layers.ConvolutionalLayer(x, [135, 240, 3], [3, 3, 3, 64]) model.append(conv1) with tf.variable_scope('conv2'): conv2 = layers.ConvolutionalLayer(conv1.output(), conv1.get_output_shape(), [3, 3, 64, 64], pool=True) model.append(conv2) with tf.variable_scope('conv3'): conv3 = layers.ConvolutionalLayer(conv2.output(), conv2.get_output_shape(), [3, 3, 64, 128]) model.append(conv3) with tf.variable_scope('conv4'): conv4 = layers.ConvolutionalLayer(conv3.output(), conv3.get_output_shape(), [3, 3, 128, 128], pool=True) model.append(conv4) with tf.variable_scope('conv5'): conv5 = layers.ConvolutionalLayer(conv4.output(), conv4.get_output_shape(), [3, 3, 128, 256]) model.append(conv5) with tf.variable_scope('conv6'): conv6 = layers.ConvolutionalLayer(conv5.output(), conv5.get_output_shape(), [3, 3, 256, 256]) model.append(conv6) with tf.variable_scope('conv7'): conv7 = layers.ConvolutionalLayer(conv6.output(), conv6.get_output_shape(), [3, 3, 256, 256], pool=True) model.append(conv7) with tf.variable_scope('conv8'): conv8 = layers.ConvolutionalLayer(conv7.output(), conv7.get_output_shape(), [3, 3, 256, 512]) model.append(conv8) with tf.variable_scope('conv9'): conv9 = layers.ConvolutionalLayer(conv8.output(), conv8.get_output_shape(), [3, 3, 512, 512]) model.append(conv9) with tf.variable_scope('conv10'): conv10 = layers.ConvolutionalLayer(conv9.output(), conv9.get_output_shape(), [3, 3, 512, 512], pool=True) model.append(conv10) with tf.variable_scope('conv11'): conv11 = layers.ConvolutionalLayer(conv10.output(), conv10.get_output_shape(), [3, 3, 512, 512]) model.append(conv11) with tf.variable_scope('conv12'): conv12 = layers.ConvolutionalLayer(conv11.output(), conv11.get_output_shape(), [3, 3, 512, 512]) model.append(conv12) with tf.variable_scope('conv13'): conv13 = layers.ConvolutionalLayer(conv12.output(), conv12.get_output_shape(), [3, 3, 512, 512], pool=True) model.append(conv13) with tf.variable_scope('conv14'): conv14 = layers.ConvolutionalLayer(conv13.output(), conv13.get_output_shape(), [7, 7, 512, 4096], drop_out=True, drop_out_prob=keep_prob) model.append(conv14) with tf.variable_scope('conv15'): conv15 = layers.ConvolutionalLayer(conv14.output(), conv14.get_output_shape(), [1, 1, 4096, 4096], drop_out=True, drop_out_prob=keep_prob) model.append(conv15) with tf.variable_scope('convtrans1'): deconv1 = layers.ConvolutionalTransposeLayer( conv15.output(), [4, 4, 60, 4096], None) model.append(deconv1) with tf.variable_scope('conv16'): conv16 = layers.ConvolutionalLayer(conv10.output(), conv10.get_output_shape(), [1, 1, 512, 60]) model.append(conv16) conv16_output = conv16.output() sum1 = conv16_output + tf.image.resize_images( deconv1.output(), (tf.shape(conv16_output)[1], tf.shape(conv16_output)[2])) with tf.variable_scope('convtrans2'): deconv2 = layers.ConvolutionalTransposeLayer( sum1, [4, 4, 60, 60], None) model.append(deconv2) with tf.variable_scope('conv17'): conv17 = layers.ConvolutionalLayer(conv7.output(), conv7.get_output_shape(), [1, 1, 256, 60]) model.append(conv17) conv17_output = conv17.output() sum2 = conv17_output + tf.image.resize_images( deconv2.output(), (tf.shape(conv17_output)[1], tf.shape(conv17_output)[2])) with tf.variable_scope('convtrans3'): deconv3 = layers.ConvolutionalTransposeLayer(sum2, [16, 16, 60, 60], None, deconv_stride=(1, 8, 8, 1)) model.append(deconv3) with tf.variable_scope('conv18'): conv18 = layers.ConvolutionalLayer(deconv3.output(), deconv3.get_output_shape(), [1, 1, 60, 12]) model.append(conv18) with tf.variable_scope('conv19'): conv19 = layers.ConvolutionalLayer( conv18.output(), conv18.get_output_shape_tensor(), [1, 1, 12, 1], activation=function['linear']) model.append(conv19) y_pre_activation = tf.image.resize_images( conv19.output(), (135, 240)) #resize to match the ground truth's shape y_pred = function['sigmoid']( y_pre_activation) #activate the output by sigmoid cost = metrics.MultinoulliCrossEntropy(y_pre_activation, y_) #use binary cross entropy var_list = tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES) L2 = sum([ tf.reduce_mean(tf.square(theta)) #L2 regularization for theta in (weight for weight in var_list if 'weights' in weight.name) ]) cost += 1e-4 * L2 opt = tf.train.AdamOptimizer(1e-3, 0.9, 0.99, 1e-8).minimize( cost, var_list=var_list) #ADAM optimization accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.cast(y_pred >= 0.5, tf.uint8), tf.cast(y_, tf.uint8)), tf.float32)) saver = tf.train.Saver() if TRAIN: tf.Operation.run(tf.global_variables_initializer()) print('Loading VGG16 weights...') load_weights('pretrained/vgg16_weights.npz', model, sess) #load pretrained VGG16 weights best_valid_accuracy = 0. best_valid_loss = np.inf best_epoch = 0 epoch = 0 vote_to_terminate = 0 done_looping = False print('TRAINING...') start_training_time = time.time() while epoch < 200 and not done_looping: epoch += 1 num_iter_training = int(training_data[0].shape[0] / batchsize) losses_train = 0. accuracies_train = 0. start_batch_time = time.time() print('Epoch %d...' % epoch) batch = next_batch(training_data, batchsize) #training for b in batch: fd = {x: b[0], y_: b[1], keep_prob: 0.1} _, a, l = sess.run([opt, accuracy, cost], feed_dict=fd) assert not np.isnan(l), 'Train failed with loss being NaN' losses_train += l accuracies_train += a print('\ttraining loss: %s' % (losses_train / num_iter_training)) print('\ttraining accuracy: %s' % (accuracies_train / num_iter_training)) print('\tepoch %d took %.2f hours' % (epoch, (time.time() - start_batch_time) / 3600.)) num_iter_valid = int(validation_data[0].shape[0] / batchsize) losses_valid = 0. accuracies_valid = 0. start_valid_time = time.time() batch = next_batch(validation_data, batchsize) #validation for b in batch: fd = {x: b[0], y_: b[1], keep_prob: 1} l, a = sess.run([cost, accuracy], feed_dict=fd) losses_valid += l accuracies_valid += a avr_acc_valid = accuracies_valid / num_iter_valid losses_valid /= num_iter_valid print('\tvalidation took %.2f hours' % ((time.time() - start_valid_time) / 3600.)) print('\tvalidation loss: %s' % losses_valid) print('\tvalidation accuracy: %s' % avr_acc_valid) if losses_valid < best_valid_loss: best_valid_loss = losses_valid best_epoch = epoch vote_to_terminate = 0 print('\tbest validation loss achieved: %.4f' % best_valid_loss) save_path = saver.save(sess, run) print("\tmodel saved in file: %s" % save_path) else: vote_to_terminate += 1 if vote_to_terminate > 30: done_looping = True print('Training ends after %.2f hours' % ((time.time() - start_training_time) / 3600.)) print('\tbest validation accuracy: %.2f' % best_valid_accuracy) print('Training the model using all data available...') total_training_data = (np.concatenate( (training_data[0], validation_data[0])), np.concatenate( (training_data[1], validation_data[1]))) for i in range(best_epoch): num_iter_training = int(total_training_data[0].shape[0] / batchsize) losses_train = 0. start_batch_time = time.time() print('Epoch %d...' % (i + 1)) batch = next_batch(total_training_data, batchsize) #training for b in batch: fd = {x: b[0], y_: b[1], keep_prob: 0.1} _, _, l = sess.run([opt, accuracy, cost], feed_dict=fd) assert not np.isnan(l), 'Train failed with loss being NaN' losses_train += l print('\ttraining loss: %s' % (losses_train / num_iter_training)) print('\tepoch %d took %.2f hours' % (i + 1, (time.time() - start_batch_time) / 3600.)) else: #testing path = kwargs.get('testing_path') isfolder = kwargs.get('isfolder') image_list = [ path + '/' + f for f in os.listdir(path) if f.endswith('.jpg') ] if isfolder else [path] saver.restore(sess, tf.train.latest_checkpoint(run)) print('Checkpoint restored...') print('Testing %d images...' % len(image_list)) images = [] predictions = [] time.sleep(0.1) for i in tqdm.tqdm(range(len(image_list)), unit='images'): ori_img = misc.imread(image_list[i]) if len(ori_img.shape) < 3: continue img = padding(ori_img, 135, 240) img = np.reshape(img, (1, 135, 240, 3)) / 255. fd = {x: img, keep_prob: 1} pred = sess.run(y_pred, feed_dict=fd) images.append(ori_img) predictions.append(pred) time.sleep(0.1) print('Testing finished!') for i in range(len(images)): plt.figure(1) image = images[i] sal = np.reshape(predictions[i], (135, 240)) sal = depadding(sal, image.shape[0], image.shape[1]) sal = sal * (sal > np.percentile(sal, 95)) sal = gaussian_filter(sal, sigma=0.09 * sal.shape[0]) sal = (sal - np.min(sal)) / (np.max(sal) - np.min(sal)) plt.subplot(211) plt.imshow(image) plt.subplot(212) plt.imshow(sal, cmap='gray') plt.show()
def __init__(self, session, num_labels, num_layers, hidden_size, input_keep_prob, output_keep_prob, batch_size, learning_rate, lr_decay_factor, grad_clip, max_input_seq_length, max_target_seq_length, input_dim, normalization, forward_only=False, tensorboard_dir=None, tb_run_name=None, timeline_enabled=False): """ Acoustic rnn model, using ctc loss with lstm cells Inputs: session - tensorflow session num_labels - dimension of character input/one hot encoding num_layers - number of lstm layers hidden_size - size of hidden layers input_keep_prob - probability of keeping input signal for a cell during training output_keep_prob - probability of keeping output signal from a cell during training batch_size - number of training examples fed at once learning_rate - learning rate parameter fed to optimizer lr_decay_factor - decay factor of the learning rate grad_clip - max gradient size (prevent exploding gradients) max_input_seq_length - maximum length of input vector sequence max_target_seq_length - maximum length of ouput vector sequence input_dim - dimension of input vector normalization - boolean indicating whether or not to normalize data in a input batch forward_only - whether to build back prop nodes or not tensorboard_dir - path to tensorboard file (None if not activated) tb_run_name - directory name for the tensorboard files (inside tensorboard_dir, None mean no sub-directory) timeline_enabled - enable the output of a trace file for timeline visualization """ # Initialize thread management self.lock = threading.Lock() # Define GraphKeys for TensorBoard graphkey_training = tf.GraphKeys() graphkey_test = tf.GraphKeys() # Store model variables self.input_keep_prob = input_keep_prob self.output_keep_prob = output_keep_prob self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate') tf.summary.scalar('Learning_rate', self.learning_rate, collections=[graphkey_training, graphkey_test]) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * lr_decay_factor) self.global_step = tf.Variable(0, trainable=False, name='global_step') self.max_input_seq_length = max_input_seq_length self.max_target_seq_length = max_target_seq_length self.tensorboard_dir = tensorboard_dir self.timeline_enabled = timeline_enabled self.input_dim = input_dim self.epsilon = 1e-3 # graph inputs self.inputs = tf.placeholder( tf.float32, shape=[self.max_input_seq_length, None, self.input_dim], name="inputs") # We could take an int16 for less memory consumption but CTC need an int32 self.input_seq_lengths = tf.placeholder(tf.int32, shape=[None], name="input_seq_lengths") # Define cells of acoustic model cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, state_is_tuple=True) # Define a dropout layer (used only when training) with tf.name_scope('dropout'): # Create placeholders, used to override values when running on the test set self.input_keep_prob_ph = tf.placeholder(tf.float32) self.output_keep_prob_ph = tf.placeholder(tf.float32) if not forward_only: # If we are in training then add a dropoutWrapper to the cells tf.summary.scalar('input_keep_prob', self.input_keep_prob_ph, collections=[graphkey_training]) tf.summary.scalar('output_keep_prob', self.output_keep_prob_ph, collections=[graphkey_training]) cell = tf.nn.rnn_cell.DropoutWrapper( cell, input_keep_prob=self.input_keep_prob_ph, output_keep_prob=self.output_keep_prob_ph) if num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True) # build input layer with tf.name_scope('Input_Layer'): w_i = tf.Variable(tf.truncated_normal( [input_dim, hidden_size], stddev=np.sqrt(2.0 / (2 * hidden_size))), name="input_w") b_i = tf.Variable(tf.zeros([hidden_size]), name="input_b") # make rnn inputs inputs = [ tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_i) + b_i for i in tf.split(0, self.max_input_seq_length, self.inputs) ] # Switch from a list to a tensor inputs = tf.pack(inputs) # If we are in training then add a batch normalization layer to the model if normalization and not forward_only: # Note : the tensor is [time, batch_size, input vector] so we go against dim 1 batch_mean, batch_var = tf.nn.moments(inputs, [1], shift=None, name="moments", keep_dims=True) inputs = tf.nn.batch_normalization(inputs, batch_mean, batch_var, None, None, self.epsilon, name="batch_norm") # set rnn init state to 0s init_state = cell.zero_state(self.batch_size, tf.float32) # build rnn with tf.name_scope('Dynamic_rnn'): rnn_output, self.hidden_state = tf.nn.dynamic_rnn( cell, inputs, sequence_length=self.input_seq_lengths, initial_state=init_state, time_major=True) # build output layer with tf.name_scope('Output_layer'): w_o = tf.Variable(tf.truncated_normal([hidden_size, num_labels], stddev=np.sqrt( 2.0 / (2 * num_labels))), name="output_w") b_o = tf.Variable(tf.zeros([num_labels]), name="output_b") # Compute logits self.logits = tf.pack([ tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_o) + b_o for i in tf.split(0, self.max_input_seq_length, rnn_output) ]) # compute prediction decoded, _log_prob = tf.nn.ctc_beam_search_decoder( self.logits, self.input_seq_lengths) self.prediction = tf.to_int32(decoded[0]) if not forward_only: # Sparse tensor for corrects labels input self.sparse_labels = tf.sparse_placeholder(tf.int32) # Compute ctc loss self.ctc_loss = tf.nn.ctc_loss(self.logits, self.sparse_labels, self.input_seq_lengths) # Compute mean loss : only to check on progression in learning # The loss is averaged accross the batch but before we take into account the real size of the label self.mean_loss = tf.reduce_mean( tf.truediv(self.ctc_loss, tf.to_float(self.input_seq_lengths))) with tf.name_scope('Mean_loss'): tf.summary.scalar('Training', self.mean_loss, collections=[graphkey_training]) tf.summary.scalar('Test', self.mean_loss, collections=[graphkey_test]) params = tf.trainable_variables() opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.ctc_loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, grad_clip) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) # Accuracy with tf.name_scope('Accuracy_-_Error_Rate'): error_rate = tf.reduce_mean( tf.edit_distance(self.prediction, self.sparse_labels, normalize=True)) tf.summary.scalar('Training', error_rate, collections=[graphkey_training]) tf.summary.scalar('Test', error_rate, collections=[graphkey_test]) # TensorBoard init if self.tensorboard_dir is not None: self.train_summaries = tf.summary.merge_all(key=graphkey_training) self.test_summaries = tf.summary.merge_all(key=graphkey_test) if tb_run_name is None: run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S') else: run_name = tb_run_name self.summary_writer = tf.summary.FileWriter(tensorboard_dir + '/' + run_name + '/', graph=session.graph) else: self.summary_writer = None # We need to save all variables except for the hidden_state # we keep it across batches but we don't need it across different runs # Especially when we process a one time file save_list = [ var for var in tf.global_variables() if var.name.find('hidden_state') == -1 ] self.saver = tf.train.Saver(save_list)
from __future__ import division from __future__ import print_function from __future__ import absolute_import import tensorflow as tf from collections import OrderedDict from .utils import get_from_module from .utils import process_params LOSSES = tf.GraphKeys().LOSSES __all__ = ['add_loss', 'get_losses', 'get_regularization_losses', 'get_total_loss', 'l1_loss', 'l2_loss', 'get', 'process_parameters'] def add_loss(loss): """Adds an externally defined loss to collection of losses. Parameters ---------- loss: A loss `Tensor`. """
def _get_vars(self, scope): return tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES, scope=scope)