def init_training_dnn_single(self, graph, optimizer_conf): ''' initialze training graph; assumes self.logits, self.labels_holder in place''' with graph.as_default(): # record variables we have already initialized variables_before = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) loss = nnet.loss_dnn(self.logits, self.labels_holder) learning_rate_holder = tf.placeholder(tf.float32, shape=[], name='learning_rate') # train op may introduce new variables #train_op = nnet.training(optimizer_conf, loss, learning_rate_holder) opt = nnet.prep_optimizer(optimizer_conf, learning_rate_holder) grads = nnet.get_gradients(opt, loss) train_op = nnet.apply_gradients(optimizer_conf, opt, grads) eval_acc = nnet.evaluation_dnn(self.logits, self.labels_holder) # and thus we need to intialize them variables_after = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) new_variables = list(set(variables_after) - set(variables_before)) init_train_op = tf.variables_initializer(new_variables) self.loss = loss self.learning_rate_holder = learning_rate_holder self.train_op = train_op self.eval_acc = eval_acc self.init_train_op = init_train_op
def init_training_dnn_multi(self, graph, optimizer_conf): tower_losses = [] tower_grads = [] tower_accs = [] with graph.as_default(), tf.device('/cpu:0'): variables_before = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) learning_rate_holder = tf.placeholder(tf.float32, shape=[], name='learning_rate') assert optimizer_conf['op_type'].lower() == 'sgd' opt = tf.train.GradientDescentOptimizer(learning_rate_holder) for i in range(self.num_towers): with tf.device('/gpu:%d' % i): with tf.name_scope('Tower_%d' % (i)) as scope: tower_start_index = i * self.batch_size tower_end_index = (i + 1) * self.batch_size tower_labels_holder = self.labels_holder[ tower_start_index:tower_end_index] loss = nnet.loss_dnn(self.tower_logits[i], tower_labels_holder) tower_losses.append(loss) grads = opt.compute_gradients(loss) tower_grads.append(grads) eval_acc = nnet.evaluation_dnn(self.tower_logits[i], tower_labels_holder) tower_accs.append(eval_acc) grads = nnet.average_gradients(tower_grads) train_op = opt.apply_gradients(grads) losses = tf.reduce_sum(tower_losses) accs = tf.reduce_sum(tower_accs) # initialize op for variables introduced in optimizer variables_after = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) new_variables = list(set(variables_after) - set(variables_before)) init_train_op = tf.variables_initializer(new_variables) self.loss = losses self.learning_rate_holder = learning_rate_holder self.train_op = train_op self.eval_acc = accs self.init_train_op = init_train_op
def init_training_bn_single(self, graph, optimizer_conf): ''' initialze training graph; assumes self.logits, self.labels_holder in place''' with graph.as_default(): loss = nnet.loss_dnn(self.logits, self.labels_holder) learning_rate_holder = tf.placeholder(tf.float32, shape=[], name='learning_rate') train_op = nnet.training(optimizer_conf, loss, learning_rate_holder) eval_acc = nnet.evaluation_dnn(self.logits, self.labels_holder) self.loss = loss self.learning_rate_holder = learning_rate_holder self.train_op = train_op self.eval_acc = eval_acc
def init_training_bn_multi(self, graph, optimizer_conf): tower_losses = [] tower_grads = [] tower_accs = [] with graph.as_default(), tf.device('/cpu:0'): learning_rate_holder = tf.placeholder(tf.float32, shape=[], name='learning_rate') assert optimizer_conf['op_type'].lower() == 'sgd' opt = tf.train.GradientDescentOptimizer(learning_rate_holder) for i in range(self.num_towers): with tf.device('/gpu:%d' % i): with tf.name_scope('Tower_%d' % (i)) as scope: tower_start_index = i * self.batch_size tower_end_index = (i + 1) * self.batch_size tower_labels_holder = self.labels_holder[ tower_start_index:tower_end_index] loss = nnet.loss_dnn(self.tower_logits[i], tower_labels_holder) tower_losses.append(loss) grads = opt.compute_gradients(loss) tower_grads.append(grads) eval_acc = nnet.evaluation_dnn(self.tower_logits[i], tower_labels_holder) tower_accs.append(eval_acc) grads = nnet.average_gradients(tower_grads) train_op = opt.apply_gradients(grads) losses = tf.reduce_sum(tower_losses) accs = tf.reduce_sum(tower_accs) self.loss = losses self.eval_acc = accs self.learning_rate_holder = learning_rate_holder self.train_op = train_op
def init_training_multi(self, graph, optimizer_conf): tower_losses = [] tower_grads = [] tower_asr_accs = [] tower_sid_accs = [] with graph.as_default(), tf.device('/cpu:0'): # record variables we have already initialized variables_before = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) learning_rate_holder = tf.placeholder(tf.float32, shape=[], name='learning_rate') if self.alpha_holder is None: self.alpha_holder = tf.placeholder(tf.float32, shape=[], name='alpha_holder') opt = nnet.prep_optimizer(optimizer_conf, learning_rate_holder) for i in range(self.num_towers): with tf.device('/gpu:%d' % i): with tf.name_scope('Tower_%d' % (i)) as scope: tower_start_index = i * self.batch_size tower_end_index = (i + 1) * self.batch_size tower_asr_labels_holder = self.asr_labels_holder[ tower_start_index:tower_end_index] tower_sid_labels_holder = self.sid_labels_holder[ tower_start_index:tower_end_index] tower_mask_holder = self.mask_holder[ tower_start_index:tower_end_index] asr_loss = nnet.loss_dnn(self.tower_asr_logits[i], tower_asr_labels_holder) sid_loss = nnet.loss_dnn(self.tower_sid_logits[i], tower_sid_labels_holder) loss = self.alpha_holder * asr_loss + self.beta_holder * sid_loss tower_losses.append(loss) grads = nnet.get_gradients(opt, loss) tower_grads.append(grads) asr_eval_acc = nnet.evaluation_dnn( self.tower_asr_logits[i], tower_asr_labels_holder, tower_mask_holder) sid_eval_acc = nnet.evaluation_dnn( self.tower_sid_logits[i], tower_sid_labels_holder) tower_asr_accs.append(asr_eval_acc) tower_sid_accs.append(sid_eval_acc) grads = nnet.average_gradients(tower_grads) train_op = nnet.apply_gradients(optimizer_conf, opt, grads) losses = tf.reduce_sum(tower_losses) asr_accs = tf.reduce_sum(tower_asr_accs) sid_accs = tf.reduce_sum(tower_sid_accs) # we need to intialize variables that are newly added variables_after = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) new_variables = list(set(variables_after) - set(variables_before)) init_train_op = tf.variables_initializer(new_variables) self.loss = losses self.learning_rate_holder = learning_rate_holder self.train_op = train_op self.asr_eval_acc = asr_accs self.sid_eval_acc = sid_accs self.init_train_op = init_train_op
def init_training_single(self, graph, optimizer_conf, learning_rate=None): ''' initialze training graph; assumes self.asr_logits, self.sid_logits, self.labels_holder in place''' with graph.as_default(): # record variables we have already initialized variables_before = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) learning_rate_holder = tf.placeholder(tf.float32, shape=[], name='learning_rate') if learning_rate is None: opt = nnet.prep_optimizer(optimizer_conf, learning_rate_holder) else: opt = nnet.prep_optimizer(optimizer_conf, learning_rate) if self.alpha_holder is None: self.alpha_holder = tf.placeholder(tf.float32, shape=[], name='alpha_holder') self.learning_rate_holder = learning_rate_holder self.bucket_tr_loss = [] self.bucket_tr_asr_loss = [] self.bucket_tr_sid_loss = [] self.bucket_tr_train_op = [] self.bucket_tr_asr_train_op = [] self.bucket_tr_sid_train_op = [] self.bucket_tr_asr_eval_acc = [] self.bucket_tr_sid_eval_acc = [] assert len(self.bucket_tr_asr_logits) == len( self.bucket_tr_sid_logits) for (asr_logits, sid_logits, asr_labels_holder, sid_labels_holder, mask_holder) in \ zip(self.bucket_tr_asr_logits, self.bucket_tr_sid_logits, self.bucket_tr_asr_labels_holders, self.bucket_tr_sid_labels_holders, self.bucket_tr_mask_holders): asr_loss = nnet.loss_dnn(asr_logits, asr_labels_holder, mask_holder) sid_loss = nnet.loss_dnn(sid_logits, sid_labels_holder) loss = self.alpha_holder * asr_loss + self.beta_holder * sid_loss grads = nnet.get_gradients(opt, loss) asr_grads = nnet.get_gradients(opt, asr_loss) sid_grads = nnet.get_gradients(opt, sid_loss) train_op = nnet.apply_gradients(optimizer_conf, opt, grads) asr_train_op = nnet.apply_gradients(optimizer_conf, opt, asr_grads) sid_train_op = nnet.apply_gradients(optimizer_conf, opt, sid_grads) asr_eval_acc = nnet.evaluation_dnn(asr_logits, asr_labels_holder, mask_holder) sid_eval_acc = nnet.evaluation_dnn(sid_logits, sid_labels_holder) self.bucket_tr_loss.append(loss) self.bucket_tr_asr_loss.append(asr_loss) self.bucket_tr_sid_loss.append(sid_loss) self.bucket_tr_train_op.append(train_op) self.bucket_tr_asr_train_op.append(asr_train_op) self.bucket_tr_sid_train_op.append(sid_train_op) self.bucket_tr_asr_eval_acc.append(asr_eval_acc) self.bucket_tr_sid_eval_acc.append(sid_eval_acc) variables_after = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) new_variables = list(set(variables_after) - set(variables_before)) init_train_op = tf.variables_initializer(new_variables) self.init_train_op = init_train_op