def batch_norm(inputs, is_training_phase): """ Batch normalization for fully connected layers. Args: inputs: 2D Tensor, batch size * layer width is_training_phase: boolean tf.Variable, true indicates training phase Return: normed: batch-normalized Tensor """ with tf.name_scope("batch_norm") as scope: depth = inputs.get_shape()[-1].value batch_mean, batch_var = tf.nn.moments(inputs, [0], name="moments") batch_std = tf.sqrt(batch_var) ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(is_training_phase, mean_var_with_update, lambda: (ema_mean, ema_var)) normed = (inputs - batch_mean) / batch_std return normed, batch_mean, batch_std
def batch_norm(inputs, is_training_phase): """ Batch normalization for fully connected layers. Args: inputs: 2D Tensor, batch size * layer width is_training_phase: boolean tf.Variable, true indicates training phase Return: normed: batch-normalized Tensor """ with tf.name_scope('batch_norm') as scope: depth = inputs.get_shape()[-1].value batch_mean, batch_var = tf.nn.moments(inputs, [0], name = 'moments') batch_std = tf.sqrt(batch_var) ema = tf.train.ExponentialMovingAverage(decay = 0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(is_training_phase, mean_var_with_update, lambda: (ema_mean, ema_var)) normed = (inputs - batch_mean) / batch_std return normed, batch_mean, batch_std
def apply(self, x, index, model): with tf.name_scope(self.name): beta = tf.Variable(tf.constant(0.0, shape=[self.fan_out]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[self.fan_out]), name='gamma', trainable=self.affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(model.is_training, mean_var_with_update, lambda: (ema_mean, ema_var)) self.h = tf.nn.batch_norm_with_global_normalization( x, mean, var, beta, gamma, 1e-3, self.affine) return self.h
def batch_norm(x, phase_train, scope='bn', affine=True): with tf.variable_scope(scope): shape = x.get_shape().as_list() beta = tf.Variable(tf.constant(0.0, shape=[shape[-1]]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[shape[-1]]), name='gamma', trainable=affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, beta, gamma, 1e-3, affine) return normed
def batch_norm(x, n_out, phase_train, scope='bn', affine=True): """ Batch normalization on convolutional maps. Args: x: Tensor, 4D BHWD input maps n_out: integer, depth of input maps phase_train: boolean tf.Variable, true indicates training phase scope: string, variable scope affine: whether to affine-transform outputs Return: normed: batch-normalized maps """ with tf.variable_scope(scope): beta = tf.Variable(tf.constant(0.0, shape=[n_out]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), name='gamma', trainable=affine) tf.add_to_collection('biases', beta) tf.add_to_collection('weights', gamma) batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.99) def mean_var_with_update(): ema_apply_op = ema.apply([batch_mean, batch_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var))) normed = tf.nn.batch_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-3, affine) return normed
def batch_norm(x, phase_train, name='bn', decay=0.99, reuse=None, affine=True): """ Batch normalization on convolutional maps. from: https://stackoverflow.com/questions/33949786/how-could-i- use-batch-normalization-in-tensorflow Only modified to infer shape from input tensor x. Parameters ---------- x Tensor, 4D BHWD input maps phase_train boolean tf.Variable, true indicates training phase name string, variable name affine whether to affine-transform outputs Return ------ normed batch-normalized maps """ with tf.variable_scope(name, reuse=reuse): og_shape = x.get_shape().as_list() if len(og_shape) == 2: x = tf.reshape(x, [-1, 1, 1, og_shape[1]]) shape = x.get_shape().as_list() beta = tf.get_variable(name='beta', shape=[shape[-1]], initializer=tf.constant_initializer(0.0), trainable=True) gamma = tf.get_variable(name='gamma', shape=[shape[-1]], initializer=tf.constant_initializer(1.0), trainable=affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=decay) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): """Summary Returns ------- name : TYPE Description """ with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) # tf.nn.batch_normalization normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, beta, gamma, 1e-5, affine) if len(og_shape) == 2: normed = tf.reshape(normed, [-1, og_shape[-1]]) return normed
def batch_norm_layer(x, n_out, decay=0.9, name='batchnorm', affine=True): """Batch normalization on convolutional maps. Parameters ---------- x: Tensor, 4D BHWD input maps n_out: integer depth of input maps train_flag: boolean tf.Variable true indicates training phase name: string variable scope affine: bool whether to affine-transform outputs Returns ------- normed batch-normalized maps Based on the implementation described at http://stackoverflow.com/a/34634291 """ train_flag = __get_global('is_training')[0] with tf.variable_scope(name): beta = tf.Variable(tf.constant(0.0, shape=[n_out]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), name='gamma', trainable=affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=decay) ema_apply_op = ema.apply([batch_mean, batch_var]) def __mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) def __mean_var_without_update(): return ema.average(batch_mean), ema.average(batch_var) mean, var = control_flow_ops.cond(train_flag, __mean_var_with_update, __mean_var_without_update) normed = tf.nn.batch_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-3, affine) return normed
def batch_norm_layer(x, n_out, decay=0.9, name='batchnorm', affine=True): """Batch normalization on convolutional maps. Parameters ---------- x: Tensor, 4D BHWD input maps n_out: integer depth of input maps train_flag: boolean tf.Variable true indicates training phase name: string variable scope affine: bool whether to affine-transform outputs Returns ------- normed batch-normalized maps Based on the implementation described at http://stackoverflow.com/a/34634291 """ train_flag = __get_global('is_training')[0] with tf.variable_scope(name): beta = tf.Variable(tf.constant(0.0, shape=[n_out]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), name='gamma', trainable=affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=decay) ema_apply_op = ema.apply([batch_mean, batch_var]) def __mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) def __mean_var_without_update(): return ema.average(batch_mean), ema.average(batch_var) mean, var = control_flow_ops.cond(train_flag, __mean_var_with_update, __mean_var_without_update) normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, beta, gamma, 1e-3, affine) return normed
def encoder(inputs, noise_std): """ encoder """ z_list = {} z_corr_list = {} mean_list = {} var_list = {} h = inputs + tf.random_normal(tf.shape(inputs)) * noise_std # labeled # u_inputs = tf.slice(inputs, [0, 0], [batch_size, -1]) # l_inputs = tf.slice(inputs, [batch_size, 0], [-1, -1]) z_list[0] = inputs z_corr_list[0] = h for l in range(1, L+1): z_pre = tf.matmul(h, weights['W'][l-1]) def training_BN(): z_pre_u = tf.slice(z_pre, [0,0], [batch_size,-1]) z_pre_l = tf.slice(z_pre, [batch_size,0], [-1,-1]) mean_u, var_u = tf.nn.moments(z_pre_u, axes=[0]) mean_l, var_l = tf.nn.moments(z_pre_l, axes=[0]) z_u = batch_norm(z_pre_u, mean_u, var_u) + tf.random_normal(tf.shape(z_pre_u)) * noise_std z_l = batch_norm(z_pre_l, mean_l, var_l) + tf.random_normal(tf.shape(z_pre_l)) * noise_std z = tf.concat(0, [z_u, z_l]) # mean = tf.concat(0, [mean_u, mean_l]) # var = tf.concat(0, [var_u, var_l]) return z, mean_u, var_u def test_BN(): mean, var = tf.nn.moments(z_pre, axes=[0]) z = batch_norm(z_pre, mean, var) return z, mean, var z, mean, var = control_flow_ops.cond(training, training_BN, test_BN) # z = batch_norm(z_pre, mean, var) + tf.random_normal(tf.shape(inputs)) * noise_std if noise_std > 0: # corrupted encoder, storing z_pre z_corr_list[l] = z_pre else: # clean encoder z_list[l] = z mean_list[l] = mean var_list[l] = var h_pre = tf.mul(weights['gamma'][l-1], z + weights['beta'][l-1]) if l == L: h = tf.nn.softmax(h_pre) else: h = tf.nn.relu(h_pre) mean_list[0] = 0 var_list[0] = 1 return h, z_corr_list, z_list, mean_list, var_list
def encoder(inputs, noise_std): h = inputs + tf.random_normal(tf.shape(inputs)) * noise_std # add noise to input d = {} # to store the pre-activation, activation, mean and variance for each layer # The data for labeled and unlabeled examples are stored separately d['labeled'] = {'z': {}, 'm': {}, 'v': {}, 'h': {}} d['unlabeled'] = {'z': {}, 'm': {}, 'v': {}, 'h': {}} d['labeled']['z'][0], d['unlabeled']['z'][0] = split_lu(h) for l in range(1, L+1): print "Layer ", l, ": ", layer_sizes[l-1], " -> ", layer_sizes[l] d['labeled']['h'][l-1], d['unlabeled']['h'][l-1] = split_lu(h) z_pre = tf.matmul(h, weights['W'][l-1]) # pre-activation z_pre_l, z_pre_u = split_lu(z_pre) # split labeled and unlabeled examples m, v = tf.nn.moments(z_pre_u, axes=[0]) # if training: def training_batch_norm(): # Training batch normalization # batch normalization for labeled and unlabeled examples is performed separately if noise_std > 0: # Corrupted encoder # batch normalization + noise z = join(batch_normalization(z_pre_l), batch_normalization(z_pre_u, m, v)) z += tf.random_normal(tf.shape(z_pre)) * noise_std else: # Clean encoder # batch normalization + update the average mean and variance using batch mean and variance of labeled examples z = join(update_batch_normalization(z_pre_l, l), batch_normalization(z_pre_u, m, v)) return z # else: def eval_batch_norm(): # Evaluation batch normalization # obtain average mean and variance and use it to normalize the batch mean = ewma.average(running_mean[l-1]) var = ewma.average(running_var[l-1]) z = batch_normalization(z_pre, mean, var) # Instead of the above statement, the use of the following 2 statements containing a typo # consistently produces a 0.2% higher accuracy for unclear reasons. # m_l, v_l = tf.nn.moments(z_pre_l, axes=[0]) # z = join(batch_normalization(z_pre_l, m_l, mean, var), batch_normalization(z_pre_u, mean, var)) return z # perform batch normalization according to value of boolean "training" placeholder: z = control_flow_ops.cond(training, training_batch_norm, eval_batch_norm) if l == L: # use softmax activation in output layer h = tf.nn.softmax(weights['gamma'][l-1] * (z + weights["beta"][l-1])) else: # use ReLU activation in hidden layers h = tf.nn.relu(z + weights["beta"][l-1]) d['labeled']['z'][l], d['unlabeled']['z'][l] = split_lu(z) d['unlabeled']['m'][l], d['unlabeled']['v'][l] = m, v # save mean and variance of unlabeled examples for decoding d['labeled']['h'][l], d['unlabeled']['h'][l] = split_lu(h) return h, d
def batch_norm(x, n_out, phase_train, scope='bn', scope2='bn', affine=True, init_beta=None, init_gamma=None, frozen=False, model=None): """ Batch normalization on convolutional maps. Args: x: input tensor, [B, H, W, D] n_out: integer, depth of input maps phase_train: boolean tf.Variable, true indicates training phase scope: string, variable scope affine: whether to affine-transform outputs Return: normed: batch-normalized maps """ trainable = not frozen with tf.variable_scope(scope): if init_beta is None: init_beta = tf.constant(0.0, shape=[n_out]) if init_gamma is None: init_gamma = tf.constant(1.0, shape=[n_out]) beta = weight_variable( [n_out], init_val=init_beta, name='beta', trainable=trainable) gamma = weight_variable( [n_out], init_val=init_gamma, name='gamma', trainable=trainable) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') # batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2]) batch_mean.set_shape([n_out]) batch_var.set_shape([n_out]) ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3) if model is not None: for name, param in zip(['beta', 'gamma'], [beta, gamma]): key = '{}_{}'.format(scope2, name) if key in model: raise Exception('Key exists: {}'.format(key)) model[key] = param return normed, batch_mean, batch_var, ema_mean, ema_var
def batch_norm(x, n_out, phase_train, scope='bn', scope2='bn', affine=True, init_beta=None, init_gamma=None, frozen=False, model=None): """ Batch normalization on convolutional maps. Args: x: input tensor, [B, H, W, D] n_out: integer, depth of input maps phase_train: boolean tf.Variable, true indicates training phase scope: string, variable scope affine: whether to affine-transform outputs Return: normed: batch-normalized maps """ trainable = not frozen with tf.variable_scope(scope): if init_beta is None: init_beta = tf.constant(0.0, shape=[n_out]) if init_gamma is None: init_gamma = tf.constant(1.0, shape=[n_out]) beta = weight_variable( [n_out], init_val=init_beta, name='beta', trainable=trainable) gamma = weight_variable( [n_out], init_val=init_gamma, name='gamma', trainable=trainable) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') batch_mean.set_shape([n_out]) batch_var.set_shape([n_out]) ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3) if model is not None: for name, param in zip(['beta', 'gamma'], [beta, gamma]): key = '{}_{}'.format(scope2, name) if key in model: raise Exception('Key exists: {}'.format(key)) model[key] = param return normed, batch_mean, batch_var, ema_mean, ema_var
def CreateInput(self): assert self.DataFeed is not None, "This model works with a TFRecords data feed" tTrainImageBatch, tTrainLabelBatch = self.DataFeed.TrainingBatches() tTestImageBatch, tTestLabelBatch = self.DataFeed.TestingBatches() tImages, tLabels = control_flow_ops.cond( self.IsTraining, lambda: (tTrainImageBatch, tTrainLabelBatch), lambda: (tTestImageBatch, tTestLabelBatch)) self.Input = tImages self.Targets = tLabels return self.Input, self.Targets
def batch_norm(x, phase_train=True, scope='bn', affine=True): """ ---------- x Tensor, 4D BHWD input maps phase_train boolean tf.Variable, true indicates training phase scope string, variable scope affine whether to affine-transform outputs Return ------ normed batch-normalized maps """ with tf.variable_scope(scope): shape = tf.shape(x) beta = tf.Variable(tf.constant(0.0, shape=[shape[-1]]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[shape[-1]]), name='gamma', trainable=affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): """Summary Returns ------- name : TYPE Description """ with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, beta, gamma, 1e-3, affine) return normed
def batch_norm (x, n_out, phase_train, scope='bn', affine=True): with tf.variable_scope(scope): beta = tf.Variable(tf.constant(0.0, shape=[n_out]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), name='gamma', trainable=affine) tf.add_to_collection('biases', beta) tf.add_to_collection('weights', gamma) batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.99) def mean_var_with_update(): ema_apply_op = ema.apply([batch_mean, batch_var]) with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var))) normed = tf.nn.batch_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-3, affine) return normed
def apply(self, x, index, model): with tf.name_scope(self.name): beta = tf.Variable(tf.constant(0.0, shape=[self.fan_out]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[self.fan_out]), name='gamma', trainable=self.affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(model.is_training, mean_var_with_update, lambda: (ema_mean, ema_var)) self.h = tf.nn.batch_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-3, self.affine) return self.h
def batch_norm(x, phase_train, scope='bn', affine=True): with tf.variable_scope(scope): shape = x.get_shape().as_list() beta = tf.Variable(tf.constant(0.0, shape=[shape[-1]]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[shape[-1]]), name='gamma', trainable=affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train,mean_var_with_update,lambda: (ema_mean, ema_var)) normed = tf.nn.batch_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-3, affine) return normed
def conv_batch_norm(x, n_out, phase_train): beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32) gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32) beta = tf.get_variable("beta", [n_out], initializer=beta_init) gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init) batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) normed = tf.nn.batch_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-3, True) return normed
def loss_and_accuracy_per_gpu(phase_train, scope='gpu_i'): # train/test inputs train_image_batch, train_label_batch = m.make_train_batch( FLAGS.train_tf_path, FLAGS.train_batch_size) val_image_batch, val_label_batch = m.make_validation_batch( FLAGS.val_tf_path, FLAGS.val_batch_size) image_batch, label_batch = control_flow_ops.cond( phase_train, lambda: (train_image_batch, train_label_batch), lambda: (val_image_batch, val_label_batch)) # model outputs logits = m.residual_net(image_batch, FLAGS.residual_net_n, 10, phase_train) # total loss m.loss(logits, label_batch) loss = tf.add_n(tf.get_collection('losses', scope), name='total_loss') accuracy = m.accuracy(logits, label_batch) tf.scalar_summary('train_loss/' + scope, loss) tf.scalar_summary('train_accuracy/' + scope, accuracy) return loss, accuracy, logits
def batch_normalize(tensor_in, epsilon=1e-5, convnet=True, decay=0.9, scale_after_normalization=True): """Batch Normalization Args: tensor_in: input Tensor, 4D shape: [batch, in_height, in_width, in_depth]. epsilon : A float number to avoid being divided by 0. decay: decay rate for exponential moving average. convnet: Whether this is for convolutional net use. If this is True, moments will sum across axis [0, 1, 2]. Otherwise, only [0]. scale_after_normalization: Whether to scale after normalization. """ shape = tensor_in.get_shape().as_list() with tf.variable_scope("batch_norm"): gamma = tf.get_variable("gamma", [shape[-1]], initializer=tf.random_normal_initializer(1., 0.02)) beta = tf.get_variable("beta", [shape[-1]], initializer=tf.constant_initializer(0.)) ema = tf.train.ExponentialMovingAverage(decay=decay) if convnet: assign_mean, assign_var = tf.nn.moments(tensor_in, [0, 1, 2]) else: assign_mean, assign_var = tf.nn.moments(tensor_in, [0]) ema_assign_op = ema.apply([assign_mean, assign_var]) ema_mean, ema_var = ema.average(assign_mean), ema.average(assign_var) def update_mean_var(): """Internal function that updates mean and variance during training""" with tf.control_dependencies([ema_assign_op]): return tf.identity(assign_mean), tf.identity(assign_var) IS_TRAINING = tf.get_collection("IS_TRAINING")[-1] mean, variance = control_flow_ops.cond(IS_TRAINING, update_mean_var, lambda: (ema_mean, ema_var)) return tf.nn.batch_norm_with_global_normalization( tensor_in, mean, variance, beta, gamma, epsilon, scale_after_normalization=scale_after_normalization)
def loss_and_accuracy_per_gpu(phase_train, scope='gpu_i'): # train/test inputs train_image_batch, train_label_batch = m.make_train_batch( FLAGS.train_tf_path, FLAGS.train_batch_size) val_image_batch, val_label_batch = m.make_validation_batch( FLAGS.val_tf_path, FLAGS.val_batch_size) image_batch, label_batch = control_flow_ops.cond(phase_train, lambda: ( train_image_batch, train_label_batch), lambda: (val_image_batch, val_label_batch)) # model outputs logits = m.residual_net( image_batch, FLAGS.residual_net_n, 10, phase_train) # total loss m.loss(logits, label_batch) loss = tf.add_n(tf.get_collection('losses', scope), name='total_loss') accuracy = m.accuracy(logits, label_batch) tf.scalar_summary('train_loss/' + scope, loss) tf.scalar_summary('train_accuracy/' + scope, accuracy) return loss, accuracy, logits
def batch_norm2(x, n_out, phase_train, scope='bn', affine=True): """ http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow/33950177 Batch normalization on convolutional maps. Args: x: Tensor, 4D BHWD input maps n_out: integer, depth (channel) of input maps phase_train: boolean tf.Variable, true indicates training phase scope: string, variable scope affine: whether to affine-transform outputs Return: normed: batch-normalized maps """ with tf.variable_scope(scope): beta = tf.Variable(tf.constant(0.0, shape=[n_out]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), name='gamma', trainable=affine) batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, beta, gamma, 1e-3, affine) return normed
def batch_norm(x, n_out, phase_train, scope='bn', conv_moments=True, affine=True): """ Batch normalization on convolutional maps. (From http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow.) :param x: 4D tensor, BHWD input maps :param n_out: integer, depth of input maps :param phase_train: boolean tf.Variable, true indicates training phase :param scope: string, variable scope :param conv_moments: boolean, true indicates to calculate moment across 3 axes :param affine: whether to affine-transform outputs :return: batch-normalized maps """ with tf.variable_scope(scope): beta = tf.Variable(tf.constant(0.0, shape=[n_out]), name='beta', trainable=True) gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), name='gamma', trainable=affine) if conv_moments: axes = [0, 1, 2] else: axes = [0] batch_mean, batch_var = tf.nn.moments(x, axes, name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) if not conv_moments: x = tf.reshape(x, [-1, 1, 1, n_out]) normed = tf.nn.batch_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-4, affine) return normed
def train_and_val(): with tf.Graph().as_default(): # train/test phase indicator phase_train = tf.placeholder(tf.bool, name='phase_train') # learning rate is manually set learning_rate = tf.placeholder(tf.float32, name='learning_rate') # global step global_step = tf.Variable(0, trainable=False, name='global_step') # train/test inputs train_image_batch, train_label_batch = m.make_train_batch(FLAGS.train_tf_path, FLAGS.train_batch_size) val_image_batch, val_label_batch = m.make_validation_batch(FLAGS.val_tf_path, FLAGS.val_batch_size) image_batch, label_batch = control_flow_ops.cond(phase_train, lambda: (train_image_batch, train_label_batch), lambda: (val_image_batch, val_label_batch)) # model outputs logits = m.residual_net(image_batch, FLAGS.residual_net_n, 10, phase_train) # total loss loss = m.loss(logits, label_batch) accuracy = m.accuracy(logits, label_batch) tf.scalar_summary('train_loss', loss) tf.scalar_summary('train_accuracy', accuracy) # train one step train_op = m.train_op(loss, global_step, learning_rate) # saver saver = tf.train.Saver(tf.all_variables()) # start session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) # summary summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, graph_def=sess.graph_def) for var in tf.trainable_variables(): tf.histogram_summary('params/' + var.op.name, var) # initialization (TODO: or load) init_op = tf.initialize_all_variables() print('Initializing...') sess.run(init_op, {phase_train.name: True}) # train loop tf.train.start_queue_runners(sess=sess) curr_lr = 0.0 lr_scale = 1.0 for step in xrange(FLAGS.max_steps): # set learning rate manually if step <= 32000: _lr = lr_scale * 1e-1 elif step <= 48000: _lr = lr_scale * 1e-2 else: _lr = lr_scale * 1e-3 if curr_lr != _lr: curr_lr = _lr print('Learning rate set to %f' % curr_lr) fetches = [train_op, loss] if step % FLAGS.summary_interval == 0: fetches += [accuracy, summary_op] sess_outputs = sess.run(fetches, {phase_train.name: True, learning_rate.name: curr_lr}) if step % FLAGS.summary_interval == 0: train_loss_value, train_acc_value, summary_str = sess_outputs[1:] print('[%s] Iteration %d, train loss = %f, train accuracy = %f' % (datetime.now(), step, train_loss_value, train_acc_value)) summary_writer.add_summary(summary_str, step) if step > 0 and step % FLAGS.val_interval == 0: print('Evaluating...') n_val_samples = 10000 val_batch_size = FLAGS.val_batch_size n_val_batch = int(n_val_samples / val_batch_size) val_logits = np.zeros((n_val_samples, 10), dtype=np.float32) val_labels = np.zeros((n_val_samples), dtype=np.int64) val_losses = [] for i in xrange(n_val_batch): fetches = [logits, label_batch, loss] session_outputs = sess.run(fetches, {phase_train.name: False}) val_logits[i*val_batch_size:(i+1)*val_batch_size,:] = session_outputs[0] val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1] val_losses.append(session_outputs[2]) pred_labels = np.argmax(val_logits, axis=1) val_accuracy = np.count_nonzero(pred_labels == val_labels) / n_val_samples val_loss = float(np.mean(np.asarray(val_losses))) print('Test accuracy = %f' % val_accuracy) val_summary = tf.Summary() val_summary.value.add(tag='val_accuracy', simple_value=val_accuracy) val_summary.value.add(tag='val_loss', simple_value=val_loss) summary_writer.add_summary(val_summary, step) if step > 0 and step % FLAGS.save_interval == 0: checkpoint_path = os.path.join(FLAGS.log_dir, 'checkpoint') saver.save(sess, checkpoint_path, global_step=step) print('Checkpoint saved at %s' % checkpoint_path)
def encoder(inputs, noise_std): # 生成正太分布的随机噪点,乘以noise_std调整噪点的权重 h = inputs + tf.random_normal(tf.shape(inputs)) * noise_std # d用来储存 激活前的值 激活后的值 平均值 方差 # to store the pre-activation, activation, mean and variance for each layer d = {} # 把数据集分别切到两个(标记,未标记)序列中 # The data for labeled and unlabeled examples are stored separately d['labeled'] = {'z': {}, 'm': {}, 'v': {}, 'h': {}} d['unlabeled'] = {'z': {}, 'm': {}, 'v': {}, 'h': {}} # 设定第0层的值 d['labeled']['z'][0], d['unlabeled']['z'][0] = split_lu(h) # 逐层迭代 for l in range(1, L+1): # logic layer start at 1 current_logic_layer = l # data layer start at 0 current_data_layer = current_logic_layer - 1 # next data layer next_data_layer = current_data_layer + 1 print "Current Layer ", current_logic_layer, " : ", layer_sizes[current_data_layer], " -> to next layer : ", layer_sizes[next_data_layer] d['labeled']['h'][current_data_layer], d['unlabeled']['h'][current_data_layer] = split_lu(h) # matmul 矩阵乘法,激活之前的运算 # pre-activation z_pre = tf.matmul(h, weights['W'][l-1]) # 算完继续分开 z_pre_l, z_pre_u = split_lu(z_pre) # split labeled and unlabeled examples # 计算非标记数据的均值与方差 m, v = tf.nn.moments(z_pre_u, axes=[0]) # bn算法训练流程 def training_batch_norm(): # 训练两组encoder,一个是加入噪点的,一个是不加入噪点的 # 且batch normalization中标记数据和未标记数据分开处理 if noise_std > 0: # 对标记数据和非标记数据分别进行batch_norm,然后合并 z = join(batch_normalization(z_pre_l), batch_normalization(z_pre_u, m, v)) # 加入噪点,生成一个与z_pre同样大小的向量,用随机数填充,然后乘以随机噪点权重 z += tf.random_normal(tf.shape(z_pre)) * noise_std else: # Clean encoder # batch normalization + update the average mean and variance using batch mean and variance of labeled examples # 如果要训练干净的编码器,并不需要加入随机噪点 z = join(update_mean_var_and_batch_normalization(z_pre_l, l), batch_normalization(z_pre_u, m, v)) return z #else: # 进入评估分支 def eval_batch_norm(): # Evaluation batch normalization # obtain average mean and variance and use it to normalize the batch mean = ewma.average(running_mean[l-1]) var = ewma.average(running_var[l-1]) z = batch_normalization(z_pre, mean, var) # Instead of the above statement, the use of the following 2 statements containing a typo # consistently produces a 0.2% higher accuracy for unclear reasons. # m_l, v_l = tf.nn.moments(z_pre_l, axes=[0]) # z = join(batch_normalization(z_pre_l, m_l, mean, var), batch_normalization(z_pre_u, mean, var)) return z # perform batch normalization according to value of boolean "training" placeholder: # training是一个bool值,根据改值的设定,确定是进入训练还是评价流程 z = control_flow_ops.cond(is_training, training_batch_norm, eval_batch_norm) # 如果是输出层(最后一层),应用softmax函数 if l == L: # use softmax activation in output layer h = tf.nn.softmax(weights['gamma'][l-1] * (z + weights["beta"][l-1])) # 如果不是输出层,使用ReLU激活函数 else: # use ReLU activation in hidden layers h = tf.nn.relu(z + weights["beta"][l-1]) d['labeled']['z'][l], d['unlabeled']['z'][l] = split_lu(z) d['unlabeled']['m'][l], d['unlabeled']['v'][l] = m, v # save mean and variance of unlabeled examples for decoding d['labeled']['h'][l], d['unlabeled']['h'][l] = split_lu(h) return h, d
def train_and_val(): with tf.Graph().as_default(): # train/test phase indicator phase_train = tf.placeholder(tf.bool, name='phase_train') # learning rate is manually set learning_rate = tf.placeholder(tf.float32, name='learning_rate') tf.scalar_summary('learning_rate', learning_rate) # global step global_step = tf.Variable(0, trainable=False, name='global_step') # train/test inputs train_image_batch, train_label_batch = m.make_train_batch( FLAGS.train_tf_path, FLAGS.train_batch_size) val_image_batch, val_label_batch = m.make_validation_batch( FLAGS.val_tf_path, FLAGS.val_batch_size) image_batch, label_batch = control_flow_ops.cond( phase_train, lambda: (train_image_batch, train_label_batch), lambda: (val_image_batch, val_label_batch)) # model outputs logits = m.residual_net(image_batch, FLAGS.residual_net_n, 10, phase_train) # total loss loss = m.loss(logits, label_batch) accuracy = m.accuracy(logits, label_batch) tf.scalar_summary('train_loss', loss) tf.scalar_summary('train_accuracy', accuracy) # train one step train_op = m.train_op(loss, global_step, learning_rate) # saver saver = tf.train.Saver(tf.all_variables()) # start session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) # summary writer summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, graph=sess.graph) # initialize parameters or load from a checkpoint if FLAGS.load_dir != '': # load from checkpoint checkpoint = tf.train.get_checkpoint_state(FLAGS.load_dir) model_checkpoint_path = checkpoint.model_checkpoint_path if checkpoint and model_checkpoint_path: saver.restore(sess, model_checkpoint_path) print('Model restored from %s' % model_checkpoint_path) else: raise 'Load directory provided by no checkpoint found' else: init_op = tf.initialize_all_variables() print('Initializing...') sess.run(init_op, {phase_train.name: True}) print('Start training...') # train loop tf.train.start_queue_runners(sess=sess) curr_lr = 0.0 for step in xrange(FLAGS.max_steps): # # set learning rate manually # if step <= 5000: # _lr = 1e-2 # elif step <= 32000: # _lr = 1e-1 # elif step <= 48000: # _lr = 1e-2 # else: # _lr = 1e-3 # set learning rate manually if step <= 48000: _lr = 1e-2 else: _lr = 1e-3 if curr_lr != _lr: curr_lr = _lr print('Learning rate set to %f' % curr_lr) # train fetches = [train_op, loss] if step > 0 and step % FLAGS.summary_interval == 0: fetches += [accuracy, summary_op] sess_outputs = sess.run(fetches, { phase_train.name: True, learning_rate.name: curr_lr }) # summary if step > 0 and step % FLAGS.summary_interval == 0: train_loss_value, train_acc_value, summary_str = sess_outputs[ 1:] print( '[%s] Iteration %d, train loss = %f, train accuracy = %f' % (datetime.now(), step, train_loss_value, train_acc_value)) summary_writer.add_summary(summary_str, step) # validation if step > 0 and step % FLAGS.val_interval == 0: print('Evaluating...') n_val_samples = 10000 val_batch_size = FLAGS.val_batch_size n_val_batch = int(n_val_samples / val_batch_size) val_logits = np.zeros((n_val_samples, 10), dtype=np.float32) val_labels = np.zeros((n_val_samples), dtype=np.int64) val_losses = [] for i in xrange(n_val_batch): fetches = [logits, label_batch, loss] session_outputs = sess.run(fetches, {phase_train.name: False}) val_logits[i * val_batch_size:(i + 1) * val_batch_size, :] = session_outputs[0] val_labels[i * val_batch_size:(i + 1) * val_batch_size] = session_outputs[1] val_losses.append(session_outputs[2]) pred_labels = np.argmax(val_logits, axis=1) val_accuracy = np.count_nonzero( pred_labels == val_labels) / n_val_samples val_loss = float(np.mean(np.asarray(val_losses))) print('Test accuracy = %f' % val_accuracy) val_summary = tf.Summary() val_summary.value.add(tag='val_accuracy', simple_value=val_accuracy) val_summary.value.add(tag='val_loss', simple_value=val_loss) summary_writer.add_summary(val_summary, step) # save variables if step > 0 and step % FLAGS.save_interval == 0: checkpoint_path = os.path.join(FLAGS.log_dir, 'checkpoint') saver.save(sess, checkpoint_path, global_step=step) print('Checkpoint saved at %s' % checkpoint_path)
def BatchNormalization(self, p_tInput, p_nBatchNormMomentum=BATCH_NORMALIZATION_MOMENTUM, p_nBatchNormEpsilon=1e-3, p_bIsScalingWithGamma=True): """ Custom implementation of batch normalization layer that will be included in the upcoming machine learning framework by P.I.Kaplanoglou """ assert self.IsTraining is not None, "Control flags for the neural network are not created" sLayerName = "BN%d" % (len(self.BatchNormLayers) + 1) with tf.variable_scope(sLayerName): nInputShape = p_tInput.get_shape().as_list() nFeatures = nInputShape[-1] tBeta = tf.get_variable("BN_Beta", shape=[nFeatures], dtype=tf.float32, initializer=tf.initializers.constant(0.0), trainable=True) #tBeta = tf.Variable(tf.constant(0.0, shape=[nFeatures], dtype=dtype) , name='BN_beta', trainable=True) #self.FCBiases.append(tBeta) if p_bIsScalingWithGamma: tGamma = tf.get_variable( "BN_Gamma", shape=[nFeatures], dtype=tf.float32, initializer=tf.initializers.constant(1.0), trainable=True) #tGamma = tf.Variable(tf.constant(1.0, shape=[nFeatures], dtype=dtype), name='BN_gamma', trainable=True) #self.FCWeights.append(tGamma) else: tGamma = None if len(nInputShape) == 4: tBatchMean, tBatchVar = tf.nn.moments(p_tInput, [0, 1, 2], name='BN_moments') else: tBatchMean, tBatchVar = tf.nn.moments(p_tInput, [0], name='BN_moments') tGlobal = tf.train.ExponentialMovingAverage( decay=tf.constant(p_nBatchNormMomentum, dtype=tf.float32), name="BN_global_moments") def batchMomentsWithUpdate(): tGlobalMomentsUpdateOp = tGlobal.apply([tBatchMean, tBatchVar]) with tf.control_dependencies([tGlobalMomentsUpdateOp]): return tf.identity(tBatchMean), tf.identity(tBatchVar) def batchMoments(): return tf.identity(tBatchMean), tf.identity(tBatchVar) tGlobalMomentsUpdateOp = tGlobal.apply([tBatchMean, tBatchVar]) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, tGlobalMomentsUpdateOp) tMean, tVar = control_flow_ops.cond( self.IsTraining, batchMoments, lambda: (tGlobal.average(tBatchMean), tGlobal.average(tBatchVar))) tBN = tf.nn.batch_normalization(p_tInput, tMean, tVar, tBeta, tGamma, p_nBatchNormEpsilon) self.BatchNormLayers.append([tBN, tBeta, tGamma]) print(" [%s] Input:%s, Output:%s" % (sLayerName, nInputShape, tBN.get_shape().as_list())) return tBN
def inference (x, phase_train): keep_prob_train = tf.constant (0.5, dtype=tf.float32) keep_prob_test = tf.constant (1.0, dtype=tf.float32) keep_prob = control_flow_ops.cond (phase_train, lambda: keep_prob_train, lambda: keep_prob_test) # conv1 -> 96x96x16 with tf.variable_scope('conv1') as scope: y = conv2d (x, 3, 16, 3, 1, 'SAME', True, scope='conv') _activation_summary (y) print (y) # Block 1 -> 48x48x32 with tf.variable_scope('block1') as scope: y = batch_norm (y, 16, phase_train, scope='bn') y = tf.nn.relu (y, name='relu') y = conv2d (y, 16, 32, 3, 2, 'SAME', True, scope='conv1') # Stride y = batch_norm (y, 32, phase_train, scope='bn') y = tf.nn.relu (y, name='relu') y = tf.nn.dropout (y, keep_prob) y = conv2d (y, 32, 32, 3, 1, 'SAME', True, scope='conv2') _activation_summary (y) print (y) # Block 2 -> 24x24x64 with tf.variable_scope('block2') as scope: y = batch_norm (y, 32, phase_train, scope='bn') y = tf.nn.relu (y, name='relu') y = conv2d (y, 32, 64, 3, 2, 'SAME', True, scope='conv1') # Stride y = batch_norm (y, 64, phase_train, scope='bn') y = tf.nn.relu (y, name='relu') y = tf.nn.dropout (y, keep_prob) y = conv2d (y, 64, 64, 3, 1, 'SAME', True, scope='conv2') _activation_summary (y) print (y) # Block 3 -> 12x12x128 with tf.variable_scope('block3') as scope: y = batch_norm (y, 64, phase_train, scope='bn') y = tf.nn.relu (y, name='relu') y = conv2d (y, 64, 256, 3, 2, 'SAME', True, scope='conv1') # Stride y = batch_norm (y, 256, phase_train, scope='bn') y = tf.nn.relu (y, name='relu') y = tf.nn.dropout (y, keep_prob) y = conv2d (y, 256, 256, 3, 1, 'SAME', True, scope='conv2') _activation_summary (y) print (y) # Block 4 -> 6x6x256 #with tf.variable_scope('block4') as scope: # y = batch_norm (y, 128, phase_train, scope='bn') # y = tf.nn.relu (y, name='relu') # y = conv2d (y, 128, 256, 3, 2, 'SAME', True, scope='conv1') # Stride # y = batch_norm (y, 256, phase_train, scope='bn') # y = tf.nn.relu (y, name='relu') # y = tf.nn.dropout (y, keep_prob) # y = conv2d (y, 256, 256, 3, 1, 'SAME', True, scope='conv2') # _activation_summary (y) #print (y) with tf.variable_scope('final') as scope: y = batch_norm (y, 256, phase_train, scope='bn') y = tf.nn.relu (y, name='relu') y = tf.nn.avg_pool (y, ksize=[1, 12, 12, 1], strides=[1, 1, 1, 1], padding='VALID', name='avg_pool') print (y) y = tf.reshape (y, [-1, 256]) # Linear W = _variable_with_weight_decay('weights', [256, 2], 1e-4, 1e-4) b = _variable_with_weight_decay ('bias', [2], 0.0, 0.0) y = tf.matmul (y, W) + b _activation_summary (y) print (y) return y
def train(): with tf.Graph ().as_default (): phase_train = tf.placeholder (tf.bool, name='phase_train') global_step = tf.Variable (0, trainable=False, name='global_step') # Inputs train_image_batch, train_label_batch = input_data.distorted_inputs () val_image_batch, val_label_batch = input_data.inputs (True) image_batch, label_batch = control_flow_ops.cond (phase_train, lambda: (train_image_batch, train_label_batch), lambda: (val_image_batch, val_label_batch)) # Model logits = m.inference (image_batch, phase_train) # Loss loss, cross_entropy_mean = m.loss (logits, label_batch) # Training train_op = m.train(loss, global_step) # Saver saver = tf.train.Saver(tf.all_variables()) # Session sess = tf.Session (config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) # Summary summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter (FLAGS.train_dir, graph=sess.graph) # Init init_op = tf.initialize_all_variables() print ('Initializing...') sess.run (init_op, {phase_train.name: True}) # Start the queue runners tf.train.start_queue_runners (sess=sess) # Training loop print ('Training...') for step in xrange(FLAGS.max_steps): fetches = [train_op, loss, cross_entropy_mean] if step > 0 and step % 100 == 0: fetches += [summary_op] start_time = time.time () sess_outputs = sess.run (fetches, {phase_train.name: True}) duration = time.time () - start_time loss_value, cross_entropy_value = sess_outputs[1:3] if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.4f) (%.1f examples/sec; %.3f sec/batch)') print (format_str % (datetime.now(), step, loss_value, cross_entropy_value, examples_per_sec, sec_per_batch)) # Summary if step > 0 and step % 100 == 0: summary_str = sess_outputs[3] summary_writer.add_summary (summary_str, step) # Validation if step > 0 and step % 1000 == 0: n_val_samples = 10000 val_batch_size = FLAGS.batch_size n_val_batch = int (n_val_samples / val_batch_size) val_logits = np.zeros ((n_val_samples, 2), dtype=np.float32) val_labels = np.zeros ((n_val_samples), dtype=np.int64) val_losses = [] for i in xrange (n_val_batch): session_outputs = sess.run ([logits, label_batch, loss], {phase_train.name: False}) val_logits[i*val_batch_size:(i+1)*val_batch_size, :] = session_outputs[0] val_labels[i*val_batch_size:(i+1)*val_batch_size] = session_outputs[1] val_losses.append (session_outputs[2]) pred_labels = np.argmax (val_logits, axis=1) val_accuracy = np.count_nonzero (pred_labels == val_labels) / (n_val_batch * val_batch_size) val_loss = float (np.mean (np.asarray (val_losses))) print ('Test accuracy = %f' % val_accuracy) print ('Test loss = %f' % val_loss) val_summary = tf.Summary () val_summary.value.add (tag='val_accuracy', simple_value=val_accuracy) val_summary.value.add (tag='val_loss', simple_value=val_loss) summary_writer.add_summary (val_summary, step) # Save variables if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train_and_val(): with tf.Graph().as_default(): # train/test phase indicator phase_train = tf.placeholder(tf.bool, name='phase_train') # learning rate is manually set learning_rate = tf.placeholder(tf.float32, name='learning_rate') # global step global_step = tf.Variable(0, trainable=False, name='global_step') # train/test inputs train_image_batch, train_label_batch = m.make_train_batch( FLAGS.train_tf_path, FLAGS.train_batch_size) val_image_batch, val_label_batch = m.make_validation_batch( FLAGS.val_tf_path, FLAGS.val_batch_size) image_batch, label_batch = control_flow_ops.cond( phase_train, lambda: (train_image_batch, train_label_batch), lambda: (val_image_batch, val_label_batch)) # model outputs logits = m.residual_net(image_batch, FLAGS.residual_net_n, 10, phase_train) # total loss m.loss(logits, label_batch) loss = tf.add_n(tf.get_collection('losses'), name='total_loss') m.summary_losses() accuracy = m.accuracy(logits, label_batch) tf.scalar_summary('train_loss', loss) tf.scalar_summary('train_accuracy', accuracy) # saver saver = tf.train.Saver(tf.all_variables()) # start session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) # summary for var in tf.trainable_variables(): tf.histogram_summary('params/' + var.op.name, var) init_op = tf.initialize_all_variables() if FLAGS.restore_path is None: # initialization print('Initializing...') sess.run(init_op, {phase_train.name: True}) else: # restore from previous checkpoint sess.run(init_op, {phase_train.name: True}) print('Restore variable from %s' % FLAGS.restore_path) saver.restore(sess, FLAGS.restore_path) # train loop tf.train.start_queue_runners(sess=sess) n_samples = 10000 batch_size = FLAGS.val_batch_size n_iter = int(np.floor(n_samples / batch_size)) accuracies = [] losses = [] for step in xrange(n_iter): fetches = [loss, accuracy] val_loss, val_acc = sess.run(fetches, {phase_train.name: False}) losses.append(val_loss) accuracies.append(val_acc) print('[%s] Iteration %d, val loss = %f, val accuracy = %f' % (datetime.now(), step, val_loss, val_acc)) val_acc = np.mean(accuracies) val_loss = np.mean(losses) print('val losses is %f, accuracy is %f' % (val_loss, val_acc))
def train_and_val(): with tf.Graph().as_default(): # train/test phase indicator phase_train = tf.placeholder(tf.bool, name='phase_train') # learning rate is manually set learning_rate = tf.placeholder(tf.float32, name='learning_rate') # global step global_step = tf.Variable(0, trainable=False, name='global_step') # train/test inputs train_image_batch, train_label_batch = m.make_train_batch( FLAGS.train_tf_path, FLAGS.train_batch_size) val_image_batch, val_label_batch = m.make_validation_batch( FLAGS.val_tf_path, FLAGS.val_batch_size) image_batch, label_batch = control_flow_ops.cond(phase_train, lambda: ( train_image_batch, train_label_batch), lambda: (val_image_batch, val_label_batch)) # model outputs logits = m.residual_net( image_batch, FLAGS.residual_net_n, 10, phase_train) # total loss m.loss(logits, label_batch) loss = tf.add_n(tf.get_collection('losses'), name='total_loss') m.summary_losses() accuracy = m.accuracy(logits, label_batch) tf.scalar_summary('train_loss', loss) tf.scalar_summary('train_accuracy', accuracy) # saver saver = tf.train.Saver(tf.all_variables()) # start session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) # summary for var in tf.trainable_variables(): tf.histogram_summary('params/' + var.op.name, var) init_op = tf.initialize_all_variables() if FLAGS.restore_path is None: # initialization print('Initializing...') sess.run(init_op, {phase_train.name: True}) else: # restore from previous checkpoint sess.run(init_op, {phase_train.name: True}) print('Restore variable from %s' % FLAGS.restore_path) saver.restore(sess, FLAGS.restore_path) # train loop tf.train.start_queue_runners(sess=sess) n_samples = 10000 batch_size = FLAGS.val_batch_size n_iter = int(np.floor(n_samples / batch_size)) accuracies = [] losses = [] for step in xrange(n_iter): fetches = [loss, accuracy] val_loss, val_acc = sess.run( fetches, {phase_train.name: False}) losses.append(val_loss) accuracies.append(val_acc) print('[%s] Iteration %d, val loss = %f, val accuracy = %f' % (datetime.now(), step, val_loss, val_acc)) val_acc = np.mean(accuracies) val_loss = np.mean(losses) print('val losses is %f, accuracy is %f' % (val_loss, val_acc))