def cnn_model_fn(features, labels, mode, params): """Model function for Estimators API Inputs: -features: -labels: -mode: a tf.estimator.ModeKeys instance -params: dictionary of extra parameters to pass to the funtion """ model = params['model'] nclass = params['nclass'] hidden = params['MLP_hidden'] logits = model(features, mode, hidden) predictions = { "classes": tf.argmax(input=logits, axis=1, output_type=tf.int32), "probabilities": tf.nn.softmax(logits, name="softmax_tensor") } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) labels = tf.cast(labels, tf.int32) oh = tf.one_hot(labels, nclass) xen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=oh,logits=logits)) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = tf.add_n([xen_loss] + reg_losses) accuracy = tf.metrics.accuracy(labels, predictions['classes']) binary_accuracy = binary_acc(labels, predictions['classes']) # Configure the warm start dict if params['warm_start_checkpoint'] is not None: warm_start_dict = dict() for i in range(1,13): warm_start_dict[params['branch']+'/Layer'+str(i)+'/'] = 'Layer'+str(i)+'/' warm_start_dict[params['branch']+'/ip/'] = 'ip/' tf.contrib.framework.init_from_checkpoint(params['warm_start_checkpoint'], warm_start_dict) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = AdamaxOptimizer global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.piecewise_constant(global_step, params['boundaries'], params['values']) tf.summary.scalar('learning_rate', learning_rate) optimizer = optimizer(learning_rate) tf.summary.scalar('train_accuracy', accuracy[1]) # output to TensorBoard # Update batch norm update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss=loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) else: eval_metric_ops = {"valid_accuracy": (accuracy[0], accuracy[1]), "valid_binary_accuracy": binary_accuracy} return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def _simple_update(self, loss): updated_vars, states_assign, vars_assign = self._get_updated_vars(loss) new_loss = make_with_custom_variables(self._loss_func, updated_vars) optimizer = self._internal_optimizer if not self._opt_last: new_loss = new_loss + loss var_list = self._optimizer_vars if self._co_opt: var_list = self._opt_vars + self._optimizer_vars step = optimizer.minimize(new_loss, var_list=var_list) if self._train_opt: states_assign.append(step) states_assign.extend(vars_assign) update_ops = states_assign return update_ops
def nn_fit(model_class, data, checkpoint_name, batch_size=32, load_checkpoint=None, valid_interval=100, optimizer=AdamaxOptimizer(0.0001), log_path='log', checkpoint_path='checkpoint', max_iter=1000000, num_runner_threads=10, early_stopping=100): if not os.path.isdir(log_path): os.mkdir(log_path) if not os.path.isdir(checkpoint_path): os.mkdir(checkpoint_path) if not os.path.isdir(checkpoint_path+'/'+checkpoint_name): os.mkdir(checkpoint_path+'/'+checkpoint_name) if load_checkpoint != None: print("Checkpoint file does not exist. Creating a new one!") load_checkpoint = None train_cover_files, train_stego_files, \ valid_cover_files, valid_stego_files = data train_ds_size = len(train_cover_files)+len(train_stego_files) valid_ds_size = len(valid_cover_files)+len(valid_stego_files) train_gen = partial(_train_data_generator, train_cover_files, train_stego_files, True) valid_gen = partial(_train_data_generator, valid_cover_files, valid_stego_files, False) tf.reset_default_graph() train_runner = _GeneratorRunner(train_gen, batch_size * 10) valid_runner = _GeneratorRunner(valid_gen, batch_size * 10) is_training = tf.get_variable('is_training', dtype=tf.bool, initializer=True, trainable=False) tf_batch_size = tf.get_variable('batch_size', dtype=tf.int32, initializer=batch_size, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) disable_training_op = tf.group(tf.assign(is_training, False), tf.assign(tf_batch_size, batch_size)) enable_training_op = tf.group(tf.assign(is_training, True), tf.assign(tf_batch_size, batch_size)) img_batch, label_batch = queueSelection([valid_runner, train_runner], tf.cast(is_training, tf.int32), batch_size) model = model_class(is_training) model._build_model(img_batch) loss, accuracy = model._build_losses(label_batch) train_loss_s = _average_summary(loss, 'train_loss', valid_interval) train_accuracy_s = _average_summary(accuracy, 'train_accuracy', valid_interval) valid_loss_s = _average_summary(loss, 'valid_loss', float(valid_ds_size) / float(batch_size)) valid_accuracy_s = _average_summary(accuracy, 'valid_accuracy', float(valid_ds_size) / float(batch_size)) global_step = tf.get_variable('global_step', dtype=tf.int32, shape=[], initializer=tf.constant_initializer(0), trainable=False) minimize_op = optimizer.minimize(loss, global_step) train_op = tf.group(minimize_op, train_loss_s.increment_op, train_accuracy_s.increment_op) increment_valid = tf.group(valid_loss_s.increment_op, valid_accuracy_s.increment_op) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) saver = tf.train.Saver(max_to_keep=10000) with tf.Session() as sess: sess.run(init_op) if load_checkpoint is not None: checkpoint_file = checkpoint_path+'/'+load_checkpoint+'/model.ckpt' print("Loading checkpoint", checkpoint_file, "...") saver.restore(sess, checkpoint_file) train_runner.start_threads(sess, num_runner_threads) valid_runner.start_threads(sess, 1) writer = tf.summary.FileWriter(log_path, sess.graph) start = sess.run(global_step) sess.run(disable_training_op) sess.run([valid_loss_s.reset_variable_op, valid_accuracy_s.reset_variable_op, train_loss_s.reset_variable_op, train_accuracy_s.reset_variable_op]) _time = time.time() for j in range(0, valid_ds_size, batch_size): sess.run([increment_valid]) _acc_val = sess.run(valid_accuracy_s.mean_variable) valid_accuracy_s.add_summary(sess, writer, start) valid_loss_s.add_summary(sess, writer, start) sess.run(enable_training_op) early_stopping_cnt = early_stopping best_acc = 0.0 last_val_time = time.time() for i in range(start+1, max_iter+1): sess.run(train_op) if i % valid_interval == 0: # train train_acc = round(sess.run(train_accuracy_s.mean_variable), 4) train_loss_s.add_summary(sess, writer, i) train_accuracy_s.add_summary(sess, writer, i) # validation sess.run(disable_training_op) for j in range(0, valid_ds_size, batch_size): sess.run([increment_valid]) valid_acc = round(sess.run(valid_accuracy_s.mean_variable), 4) valid_loss_s.add_summary(sess, writer, i) valid_accuracy_s.add_summary(sess, writer, i) sess.run(enable_training_op) # log & checkpoint t = round(time.time()-last_val_time) print(i, "of", max_iter, ", until ES:", early_stopping_cnt, ", Accuracy:", train_acc, valid_acc, " : ", t, "seconds") last_val_time = time.time() if valid_acc > best_acc: best_acc = valid_acc saver.save(sess, checkpoint_path+'/'+checkpoint_name+'/model_'+ str(round(valid_acc,4))+'_'+str(i)+'.ckpt') saver.save(sess, checkpoint_path+'/'+checkpoint_name+'/model.ckpt') early_stopping_cnt = early_stopping if valid_acc >= 1.0: print(i, "Best accuracy: 1.0 : ", t, "seconds") return # Early stopping if early_stopping_cnt == 0: print("Early stopping condition!") print(i, "Best accuracy:", best_acc, " : ", t, "seconds") return early_stopping_cnt -= 1
def cnn_model_fn(features, labels, mode, params): """ NOTE for Xavier uniform initializer: the default initializer of layers in Tensorflow is glorot_uniform_initializer Therefore, there is no need to set the initializer intentially for layers. refer to line232-235: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/variable_scope.py """ """ Layers: """ input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[3, 3], strides=(1, 1), padding="same", activation=tf.nn.relu) conv2 = tf.layers.conv2d(inputs=conv1, filters=32, kernel_size=[5, 5], strides=(2, 2), padding="same", activation=tf.nn.relu) conv3 = tf.layers.conv2d(inputs=conv2, filters=64, kernel_size=[3, 3], strides=(1, 1), padding="same", activation=tf.nn.relu) conv4 = tf.layers.conv2d(inputs=conv3, filters=64, kernel_size=[5, 5], strides=(2, 2), padding="same", activation=tf.nn.relu) conv4_flat = tf.reshape(conv4, [-1, 7 * 7 * 64]) dense = tf.layers.dense(inputs=conv4_flat, units=1024, activation=tf.nn.relu) dropout = tf.layers.dropout(inputs=dense, rate=0.45, training=mode == tf.estimator.ModeKeys.TRAIN) logits = tf.layers.dense(inputs=dropout, units=47) predictions = { # Generate predictions (for PREDICT and EVAL mode) # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(input=logits, axis=1), "probabilities": tf.nn.softmax(logits, name="softmax_tensor") } #According to project requirements, logstic is used to assess the quality of CNN #======== Compute the accuracy and loss of the CNN model on the training dataset ========= #Use tf.summary.scalar to log data for tensorboard figure generation if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) tf.identity(accuracy[1], name="train_accuracy") tf.summary.scalar("prediction_accuracy", accuracy[1]) # Calculate Loss (for both TRAIN and EVAL modes) loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = ProjectOptimizer(beta=params["beta"], learning_rate=params["learning_rate"]) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops, predictions=predictions)
logits = fully_connected(hidden3, n_outputs, scope="outputs", activation_fn=None) with tf.name_scope("loss"): xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits) loss = tf.reduce_mean(xentropy, name="loss") learning_rate = 0.01 with tf.name_scope("train"): learning_rate = learning_rate optimizer = tf.train.GradientDescentOptimizer(learning_rate) training_op = optimizer.minimize(loss) with tf.name_scope("eval"): correct = tf.nn.in_top_k(logits, y, 1) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: init.run() for epoch in range(n_epochs): for iteration in range(mnist.train.num_examples // batch_size): X_batch, y_batch = mnist.train.next_batch(batch_size) sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
def cnn_model_fn(features, labels, mode, params): """ NOTE for Xavier uniform initializer: the default initializer of layers in Tensorflow is glorot_uniform_initializer Therefore, there is no need to set the initializer intentially for layers. refer to line232-235: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/variable_scope.py """ """ Encoder """ input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) # Now 28x28x1 conv1 = tf.layers.conv2d( inputs=input_layer, filters=32, kernel_size=[5, 5], strides=(2, 2), padding="same", activation=tf.nn.relu) # Now 14x14x32 conv2 = tf.layers.conv2d( inputs=conv1, filters=64, kernel_size=[5, 5], strides=(2, 2), padding="same", activation=tf.nn.relu) # Now 7x7x2 encoded = tf.layers.conv2d( inputs=conv2, filters=2, kernel_size=[3, 3], strides=(1, 1), padding="same", activation=tf.nn.relu) """ Decoder """ # Now 7x7x2 conv4 = tf.layers.conv2d_transpose( inputs=encoded, filters=64, kernel_size=[3, 3], strides=(1, 1), padding="same", activation=tf.nn.relu) conv5 = tf.layers.conv2d_transpose( inputs=conv4, filters=32, kernel_size=[5, 5], strides=(2, 2), padding="same", activation=tf.nn.relu) decodeds = tf.layers.conv2d_transpose( inputs=conv5, filters=1, kernel_size=[5, 5], strides=(2, 2), padding="same", activation=tf.nn.relu) decodeds = tf.squeeze(decodeds, [3]) # Now 28x28x1 predictions = { # Generate predictions (for PREDICT and EVAL mode) "Decoded": decodeds, "Features_map": encoded } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) #According to project requirements, MSE is used to assess the quality of CAE MSE = tf.metrics.mean_squared_error( labels=labels, predictions=decodeds) #Use tf.summary.scalar to log data for tensorboard figure generation tf.identity(MSE[1], name="MSE_training_training") tf.summary.scalar("MSE_training", MSE[1]) #Calculate Loss (for both TRAIN and EVAL modes), a Tensor of loss loss = tf.losses.mean_squared_error(labels=labels, predictions=decodeds) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = ProjectOptimizer( beta=params["beta"], learning_rate=params["learning_rate"]) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) #According to project requirements, MSE is used to assess the quality of CAE eval_metric_ops = { "MSE": tf.metrics.mean_squared_error( labels=labels, predictions=decodeds) } return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops, predictions=decodeds)
def _rnn_update(self, loss, unroll_len): x = [] for i in range(len(self._original_vars)): if i in self._omitted_items: continue x.append(self._original_vars[i]) initial_states = [self._slot_map[v] for v in x] corr_var_updates = [] def _update(fx, x, state): """Parameter and RNN state update.""" with tf.name_scope("gradients"): gradients = tf.gradients(fx, x) gradients = [tf.stop_gradient(g) for g in gradients] #gradients = [tf.stop_gradient(g) if g is not None else None for g in gradients] with tf.name_scope("deltas"): #deltas, state_next = zip(*[self._get_prediction(g, s) for g, s in zip(gradients, state)]) deltas = [] state_next = [] for g, s, v in zip(gradients, state, x): with ops.colocate_with(s): output, state = self._get_prediction(g, s) final_output = output ''' delta_dot = tf.sqrt(tf.reduce_sum(output * output)) grad_dot = tf.sqrt(tf.reduce_sum(g * g)) ratio = tf.cond(grad_dot > 0, lambda: delta_dot/grad_dot, lambda: ops.convert_to_tensor(0.0)) final_output = tf.cond(ratio > self._delta_ratio, lambda: output * self._delta_ratio / ratio, lambda: output) ''' deltas.append(final_output) state_next.append(state) ''' if not self._dynamic_unroll: denominator = grad_dot * delta_dot correlation = tf.cond(denominator > 0, lambda: tf.reduce_sum(g * output) / denominator, lambda: ops.convert_to_tensor(0.0)) correlation_var = tf.Variable(0.0, trainable=False) smoothed_correlation = correlation_var * self._corr_smooth + correlation * (1 - self._corr_smooth) corr_assign = tf.assign(correlation_var, smoothed_correlation, use_locking=True) corr_var_updates.append(corr_assign) summary.histogram(v.name, v) summary.histogram(v.name+"_gradient", g) summary.histogram(v.name+"_delta", output) summary.histogram(v.name+"_final_delta", final_output) summary.scalar(v.name+"_Gradient/dir correlation", correlation) summary.scalar(v.name+"_Gradient/dir smoothed correlation", smoothed_correlation) summary.scalar(v.name+"_grad_dot", grad_dot) summary.scalar(v.name+"_delta_dot", delta_dot) summary.scalar(v.name+"_delta_grad_ratio", ratio) ''' state_next = list(state_next) return deltas, state_next def _step(t, fx_array, fx, x, state): x_next = [] with tf.name_scope("fx"): fx_array = fx_array.write(t, fx) with tf.name_scope("dx"): deltas, state_next = _update(fx, x, state) for j in range(len(deltas)): with ops.colocate_with(x[j]): value = x[j] + deltas[j] x_next.append(value) curr_vars = self._gen_curr_vars(x_next) fx_next = make_with_custom_variables(self._loss_func, curr_vars) with tf.name_scope("t_next"): t_next = t + 1 return t_next, fx_array, fx_next, x_next, state_next def _dynamic_step(t, fx_array, x, state): x_next = [] curr_vars = self._gen_curr_vars(x) fx = make_with_custom_variables(self._loss_func, curr_vars) with tf.name_scope("fx"): fx_array = fx_array.write(t, fx) with tf.name_scope("dx"): deltas, state_next = _update(fx, x, state) for j in range(len(deltas)): with ops.colocate_with(x[j]): value = x[j] + deltas[j] x_next.append(value) with tf.name_scope("t_next"): t_next = t + 1 return t_next, fx_array, x_next, state_next fx_array = tf.TensorArray(tf.float32, size=unroll_len + 1, clear_after_read=False) if not self._dynamic_unroll: next_x = x next_states = initial_states t = 0 fx_next = loss for i in range(unroll_len): t, fx_array, fx_next, next_x, next_states = _step(t, fx_array, fx_next, next_x, next_states) x_final = next_x s_final = next_states loss_final = fx_next else: _, fx_array, x_final, s_final = tf.while_loop( cond=lambda t, *_: t < unroll_len, body=_dynamic_step, loop_vars=(0, fx_array, x, initial_states), parallel_iterations=1, swap_memory=True, name="unroll") curr_vars = self._gen_curr_vars(x_final) loss_final = make_with_custom_variables(self._loss_func, curr_vars) with tf.name_scope("fx"): fx_array = fx_array.write(unroll_len, loss_final) if not self._opt_last: loss_final = tf.reduce_sum(fx_array.stack(), name="loss") update_ops = [] optimizer = self._internal_optimizer var_list = self._optimizer_vars if self._co_opt: var_list = self._opt_vars + self._optimizer_vars step = optimizer.minimize(loss_final, var_list=var_list) if self._train_opt: update_ops.append(step) var_len = len(x_final) for i in range(var_len): var = x[i] updated_var = x_final[i] state = initial_states[i] updated_state = s_final[i] update_ops.append(tf.assign_add(var, (updated_var-var) * self._update_ratio, use_locking=True)) update_ops.append(tf.assign(state, updated_state, use_locking=True)) return update_ops + corr_var_updates
def cnn_model_fn(features, labels, mode, params): """ NOTE for Xavier uniform initializer: the default initializer of layers in Tensorflow is glorot_uniform_initializer Therefore, there is no need to set the initializer intentially for layers. refer to line232-235: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/variable_scope.py """ """ Encoder """ input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) # Now 28x28x1 conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 5], strides=(2, 2), padding="same", activation=tf.nn.relu) # Now 14x14x32 conv2 = tf.layers.conv2d(inputs=conv1, filters=64, kernel_size=[5, 5], strides=(2, 2), padding="same", activation=tf.nn.relu) # Now 7x7x2 encoded = tf.layers.conv2d(inputs=conv2, filters=2, kernel_size=[3, 3], strides=(1, 1), padding="same", activation=tf.nn.relu) """ Decoder """ upsample1 = tf.image.resize_images( encoded, size=(7, 7), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) # Now 7x7x2 conv4 = tf.layers.conv2d(inputs=upsample1, filters=64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu) # Now 7x7x64 upsample2 = tf.image.resize_images( conv4, size=(14, 14), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) # Now 14x14x64 conv5 = tf.layers.conv2d(inputs=upsample2, filters=32, kernel_size=[5, 5], padding='same', activation=tf.nn.relu) # Now 14x14x32 upsample3 = tf.image.resize_images( conv5, size=[28, 28], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) # Now 28x28x32 decodeds = tf.layers.conv2d(inputs=upsample3, filters=1, kernel_size=[5, 5], padding='same', activation=None) decodeds = tf.squeeze(decodeds, [3]) # Now 28x28x1 predictions = { # Generate predictions (for PREDICT and EVAL mode) "MSE_training": tf.metrics.mean_squared_error(labels=labels, predictions=decodeds) } #According to project requirements, MSE is used to assess the quality of CAE MSE = tf.metrics.mean_squared_error(labels=labels, predictions=decodeds) #======== Compute the accuracy and loss of the CNN model on the training dataset ========= #Use tf.summary.scalar to log data for tensorboard figure generation tf.identity(MSE[1], name="MSE_training_training") tf.summary.scalar("MSE_training", MSE[1]) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) #According to project requirements, loss is based on MSE loss = tf.losses.mean_squared_error(labels=labels, predictions=decodeds) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = ProjectOptimizer(beta=params["B"], learning_rate=params["R"]) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) #According to project requirements, MSE is used to assess the quality of CAE eval_metric_ops = { "MSE": tf.metrics.mean_squared_error(labels=labels, predictions=decodeds) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)