def build_eval_graph(x, y, scope): losses = {} with tf.variable_scope(scope, reuse=True): with tf.name_scope('Logits'): logits = train_utils.forward(x, create_summaries=False) # Create an op for the loss with tf.name_scope('Cross-Entropy-Loss'): ce_loss = layers.cross_entropy_loss(logits, y) losses['LOSS'] = ce_loss # Create op for the accuracy with tf.name_scope('Accuracy'): acc = layers.accuracy(logits, y) losses['ACC'] = acc losses['ERR'] = 1.0 - acc # Create op for the accuracy against FGSM attacker with tf.name_scope('Adversarial-Accuracy'): fgsm_perturbation, loss, raw_grad = train_utils.generate_adversarial_perturbation(x, ord='inf', epsilon=FLAGS.eps_fgsm) # Compute adversarial examples fgsm_x_adv_unclipped = x + fgsm_perturbation fgsm_x_adv = tf.clip_by_value(fgsm_x_adv_unclipped, clip_value_min=0.0, clip_value_max=1.0) fgsm_x_adv = tf.stop_gradient(fgsm_x_adv) # do not backprop through attack adv_logits = train_utils.forward(fgsm_x_adv, create_summaries=False) # Collect gradients raw_grad = tf.contrib.layers.flatten(raw_grad) raw_grad = tf.identity(raw_grad, name='fgsm-grad') raw_grad_norm = tf.norm(raw_grad, ord='euclidean') raw_grad_norm = tf.identity(raw_grad_norm, name='fgsm-grad-l2norm') losses['FGSM-GRAD-NORM'] = raw_grad_norm # Collect perturbation perturbation = tf.contrib.layers.flatten(fgsm_perturbation) perturbation = tf.identity(perturbation, name='fgsm-perturbation') tf.summary.histogram(perturbation.op.name, perturbation) perturbation_norm = tf.norm(perturbation, ord='euclidean') perturbation_norm = tf.identity(perturbation_norm, name='fgsm-perturbation-l2norm') losses['FGSM-PERTURBATION-NORM'] = perturbation_norm # Collect accuracy adv_acc = layers.accuracy(adv_logits, y) losses['ADVT-ACC'] = adv_acc losses['ADVT-ERR'] = 1.0 - adv_acc return losses, fgsm_perturbation, fgsm_x_adv
def __init__(self, input, label, k=1, **kwargs): super(Accuracy, self).__init__("accuracy", **kwargs) main_program = self.helper.main_program if main_program.current_block().idx != 0: raise ValueError("You can only invoke Evaluator in root block") self.total = self.create_state(dtype='int64', shape=[1], suffix='total') self.correct = self.create_state( dtype='int64', shape=[1], suffix='correct') kwargs = {'main_program': main_program} total = self.helper.create_tmp_variable(dtype='int') correct = self.helper.create_tmp_variable(dtype='int') acc = layers.accuracy( input=input, label=label, k=k, total=total, correct=correct, **kwargs) total = layers.cast(x=total, dtype='int64', **kwargs) correct = layers.cast(x=correct, dtype='int64', **kwargs) layers.sums(input=[self.total, total], out=self.total, **kwargs) layers.sums(input=[self.correct, correct], out=self.correct, **kwargs) self.metrics.append(acc)
def classifier_graph(self): """Constructs classifier graph from inputs to classifier loss. * Caches the VatxtInput object in `self.cl_inputs` * Caches tensors: `cl_embedded`, `cl_logits`, `cl_loss` Returns: loss: scalar float. """ inputs = _inputs('train', pretrain=False) self.cl_inputs = inputs embedded = self.layers['embedding'](inputs.tokens) self.tensors['cl_embedded'] = embedded _, next_state, logits, loss = self.cl_loss_from_embedding( embedded, return_intermediates=True) tf.summary.scalar('classification_loss', loss) self.tensors['cl_logits'] = logits self.tensors['cl_loss'] = loss acc = layers_lib.accuracy(logits, inputs.labels, inputs.weights) tf.summary.scalar('accuracy', acc) adv_loss = (self.adversarial_loss() * tf.constant( FLAGS.adv_reg_coeff, name='adv_reg_coeff')) tf.summary.scalar('adversarial_loss', adv_loss) total_loss = loss + adv_loss tf.summary.scalar('total_classification_loss', total_loss) with tf.control_dependencies([inputs.save_state(next_state)]): total_loss = tf.identity(total_loss) return total_loss
def _build(self, num_classifiers, learning_rate): # inputs self.X = tf.placeholder(tf.float32, [None, 28, 28]) self.y = tf.placeholder(tf.int32, [None]) one_hot_y = tf.one_hot(self.y, 10) networks = [layers.feedforward(self.X) for _ in range(num_classifiers)] self.individual_loss = [ layers.loss(net, one_hot_y) for net in networks ] self.individual_accuracy = [ layers.accuracy(net, one_hot_y) for net in networks ] logits = tf.reduce_mean(tf.stack(networks, axis=-1), axis=-1) l2_distance = tf.add_n([ tf.norm(networks[0] - networks[1]), tf.norm(networks[1] - networks[2]), tf.norm(networks[2] - networks[0]) ]) cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=one_hot_y) self.loss = tf.reduce_mean(cross_entropy) + 1e-4 * l2_distance optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) self.train_op = optimizer.minimize(self.loss) correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(one_hot_y, axis=1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.prediction = tf.argmax(logits, axis=1)
def __init__(self, input, label, k=1, **kwargs): super(Accuracy, self).__init__("accuracy", **kwargs) main_program = self.helper.main_program if main_program.current_block().idx != 0: raise ValueError("You can only invoke Evaluator in root block") self.total = self.create_state(dtype='int64', shape=[1], suffix='total') self.correct = self.create_state(dtype='int64', shape=[1], suffix='correct') kwargs = {'main_program': main_program} total = self.helper.create_tmp_variable(dtype='int') correct = self.helper.create_tmp_variable(dtype='int') acc = layers.accuracy(input=input, label=label, k=k, total=total, correct=correct, **kwargs) total = layers.cast(x=total, dtype='int64', **kwargs) correct = layers.cast(x=correct, dtype='int64', **kwargs) layers.sums(input=[self.total, total], out=self.total, **kwargs) layers.sums(input=[self.correct, correct], out=self.correct, **kwargs) self.metrics.append(acc)
def build_eval_graph(x, y, ul_x): losses = {} logit = forward(x, is_training=False, update_batch_stats=False) nll_loss = L.ce_loss(logit, y) losses['NLL'] = nll_loss acc = L.accuracy(logit, y) losses['Acc'] = acc return losses
def build_test_graph(x, y): losses = {} logit = adt.forward(x, is_training=False, update_batch_stats=False) nll_loss = L.ce_loss(logit, y) losses['No_NLL'] = nll_loss acc = L.accuracy(logit, y) losses['No_Acc'] = acc return losses
def build_eval_graph(x, y, ul_x): losses = {} logit = vat.forward(x, is_training=False, update_batch_stats=False) nll_loss = L.ce_loss(logit, y) losses['NLL'] = nll_loss acc = L.accuracy(logit, y) losses['Acc'] = acc scope = tf.get_variable_scope() scope.reuse_variables() at_loss = vat.adversarial_loss(x, y, nll_loss, is_training=True) losses['AT_loss'] = at_loss ul_logit = vat.forward(ul_x, is_training=False, update_batch_stats=False) vat_loss = vat.virtual_adversarial_loss(ul_x, ul_logit, is_training=False) losses['VAT_loss'] = vat_loss return losses
def classifier_graph(self): """Constructs classifier graph from inputs to classifier loss. * Caches the VatxtInput object in `self.cl_inputs` * Caches tensors: `cl_embedded`, `cl_logits`, `cl_loss` Returns: loss: scalar float. """ inputs = _inputs('train', pretrain=False) self.cl_inputs = inputs self.tensors['inputs'] = inputs.tokens embedded = self.layers['embedding'](inputs.tokens) self.tensors['embedding_weight'] = self.layers['embedding'].var self.tensors['cl_embedded'] = embedded _, next_state, logits, loss = self.cl_loss_from_embedding( embedded, return_intermediates=True) tf.summary.scalar('classification_loss', loss) self.tensors['cl_logits'] = logits self.tensors['cl_loss'] = loss self.tensors['perturb'] = self.calculate_perturb(embedded, loss) if FLAGS.single_label: indices = tf.stack([tf.range(FLAGS.batch_size), inputs.length - 1], 1) labels = tf.expand_dims(tf.gather_nd(inputs.labels, indices), 1) weights = tf.expand_dims(tf.gather_nd(inputs.weights, indices), 1) else: labels = inputs.labels weights = inputs.weights self.tensors['labels'] = labels acc = layers_lib.accuracy(logits, labels, weights) tf.summary.scalar('accuracy', acc) adv_loss = (self.adversarial_loss() * tf.constant(FLAGS.adv_reg_coeff, name='adv_reg_coeff')) tf.summary.scalar('adversarial_loss', adv_loss) total_loss = loss + adv_loss with tf.control_dependencies([inputs.save_state(next_state)]): total_loss = tf.identity(total_loss) tf.summary.scalar('total_classification_loss', total_loss) return total_loss
def _build(self, num_classifiers, learning_rate): # inputs self.X = tf.placeholder(tf.float32, [None, 28, 28]) self.y = tf.placeholder(tf.int32, [None]) one_hot_y = tf.one_hot(self.y, 10) networks = [layers.convolutional(self.X) for _ in range(num_classifiers)] self.individual_loss = [layers.loss(net, one_hot_y) for net in networks] self.individual_accuracy = [layers.accuracy(net, one_hot_y) for net in networks] logits = layers.linear(tf.concat(networks, axis=-1), 10, bias=False) cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=one_hot_y) self.loss = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) self.train_op = optimizer.minimize(self.loss) correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(one_hot_y, axis=1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.prediction = tf.argmax(logits, axis=1)
def classifier_graph(self): """Constructs classifier graph from inputs to classifier loss. * Caches the VatxtInput objects in `self.cl_inputs` * Caches tensors: `cl_embedded` (tuple of forward and reverse), `cl_logits`, `cl_loss` Returns: loss: scalar float. """ inputs = _inputs('train', pretrain=False, bidir=True) self.cl_inputs = inputs f_inputs, _ = inputs # Embed both forward and reverse with a shared embedding embedded = [self.layers['embedding'](inp.tokens) for inp in inputs] self.tensors['cl_embedded'] = embedded _, next_states, logits, loss = self.cl_loss_from_embedding( embedded, return_intermediates=True) tf.summary.scalar('classification_loss', loss) self.tensors['cl_logits'] = logits self.tensors['cl_loss'] = loss acc = layers_lib.accuracy(logits, f_inputs.labels, f_inputs.weights) tf.summary.scalar('accuracy', acc) adv_loss = (self.adversarial_loss() * tf.constant(FLAGS.adv_reg_coeff, name='adv_reg_coeff')) tf.summary.scalar('adversarial_loss', adv_loss) total_loss = loss + adv_loss tf.summary.scalar('total_classification_loss', total_loss) saves = [ inp.save_state(state) for (inp, state) in zip(inputs, next_states) ] with tf.control_dependencies(saves): total_loss = tf.identity(total_loss) return total_loss
def classifier_graph(self): """Constructs classifier graph from inputs to classifier loss. * Caches the VatxtInput objects in `self.cl_inputs` * Caches tensors: `cl_embedded` (tuple of forward and reverse), `cl_logits`, `cl_loss` Returns: loss: scalar float. """ inputs = _inputs('train', pretrain=False, bidir=True) self.cl_inputs = inputs f_inputs, _ = inputs # Embed both forward and reverse with a shared embedding embedded = [self.layers['embedding'](inp.tokens) for inp in inputs] self.tensors['cl_embedded'] = embedded _, next_states, logits, loss = self.cl_loss_from_embedding( embedded, return_intermediates=True) tf.summary.scalar('classification_loss', loss) self.tensors['cl_logits'] = logits self.tensors['cl_loss'] = loss acc = layers_lib.accuracy(logits, f_inputs.labels, f_inputs.weights) tf.summary.scalar('accuracy', acc) adv_loss = (self.adversarial_loss() * tf.constant( FLAGS.adv_reg_coeff, name='adv_reg_coeff')) tf.summary.scalar('adversarial_loss', adv_loss) total_loss = loss + adv_loss saves = [inp.save_state(state) for (inp, state) in zip(inputs, next_states)] with tf.control_dependencies(saves): total_loss = tf.identity(total_loss) tf.summary.scalar('total_classification_loss', total_loss) return total_loss
def training(): pretrained_weights = './pretrain/vgg16.npy' data_dir = './data/cifar10_data/cifar-10-batches-bin' train_log_dir = './log/train/' val_log_dir = './log/val/' with tf.name_scope('input'): images_train, labels_train = input_data.read_cifar10(data_dir, is_train=True, batch_size=BATCH_SIZE, shuffle=True) images_val, labels_val = input_data.read_cifar10(data_dir, is_train=False, batch_size=BATCH_SIZE, shuffle=False) image_holder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) label_holder = tf.placeholder(tf.int32, shape=[BATCH_SIZE, N_CLASSES]) logits = vgg.VGG16(image_holder, N_CLASSES, 0.8) loss = layers.loss(logits, label_holder) accuracy = layers.accuracy(logits, label_holder) global_steps = tf.Variable(0, name='global_step', trainable=False) train_op = layers.optimize(loss, LEARNING_RATE, global_steps) saver = tf.train.Saver(tf.global_variables()) # Refenrnce: https://stackoverflow.com/questions/35413618/tensorflow-placeholder-error-when-using-tf-merge-all-summaries summary_op = tf.summary.merge_all() # summary_op = tf.summary.merge([loss_summary, accuracy_summary], tf.GraphKeys.SUMMARIES) # The main thread init = tf.global_variables_initializer() sess = tf.InteractiveSession() sess.run(init) print('########################## Start Training ##########################') layers.load_with_skip(pretrained_weights, sess, ['fc6', 'fc7', 'fc8']) # Coordinate the relationship between threads # Reference: http://wiki.jikexueyuan.com/project/tensorflow-zh/how_tos/threading_and_queues.html coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_summary_writer = tf.summary.FileWriter(train_log_dir, graph=sess.graph) val_summary_writer = tf.summary.FileWriter(val_log_dir, graph=sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break # start_time = time .time() train_images, train_labels = sess.run([images_train, labels_train]) _, train_loss, train_acc, summary_str = sess.run([train_op, loss, accuracy, summary_op], feed_dict={image_holder: train_images, label_holder: train_labels}) # duration = time.time() - start_time if step % 50 == 0 or (step + 1) == MAX_STEP: print('step %d, loss = %.4f, accuracy = %.4f%%' % (step, train_loss, train_acc)) #summary_str = sess.run(summary_op) train_summary_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run([images_val, labels_val]) val_loss, val_acc = sess.run([loss, accuracy], feed_dict={image_holder: val_images, label_holder: val_labels}) print('step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss, val_acc)) #summary_str2 = sess.run(summary_op) val_summary_writer.add_summary(summary_str, step) # Why not use global_step=global_steps instead of step ??? if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: coord.request_stop() coord.request_stop() coord.join(threads) sess.close()
def accuracy(inp,lab): global acc acc +=1 return L.accuracy(inp,lab,'accuracy_'+str(acc))
def main(): """ Train a model with the given parameters. :return: number of iterations, interval used to measure accuracy, weight mean and standard deviation for each iteration and weight, activations in each layer for each iteration as mean and standard deviation, accuracies including batch, train and test accuracy :rtype: (int, int, {'W_0_mean': numpy.ndarray, ...}, {'h_0_mean': numpy.ndarray, ...}, {'batch_accuracies': numpy.ndarray, 'test_accuracies': numpy.ndarray, 'train_accuracies': numpy.ndarray}) """ # To be sure ... tf.reset_default_graph() test_images, test_labels = mnist.load('t10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte') train_images, train_labels = mnist.load('train-images.idx3-ubyte', 'train-labels.idx1-ubyte') test_images = test_images.reshape(test_images.shape[0], test_images.shape[1], test_images.shape[2], 1) train_images = train_images.reshape(train_images.shape[0], train_images.shape[1], train_images.shape[2], 1) train_dataset = dataset.Dataset(train_images, train_labels, args.batch_size) # Used for testing on trainng and testing set as GPU does not take the full training/ # testing sets in memory. test_train_dataset = dataset.Dataset(train_images, train_labels, 1000) test_dataset = dataset.Dataset(test_images, test_labels, 1000) with tf.device('/gpu:0'): tf.set_random_seed(time.time()*1000) if args.cnn: x = tf.placeholder(tf.float32, shape = [None, 28, 28, 1]) else: x = tf.placeholder(tf.float32, shape = [None, 784]) if args.normalizer_data: n_x = normalizer('n_x', x) else: n_x = x y = tf.placeholder(tf.float32, shape = [None, 10]) # Two convolutional layers if requested, otherwise fully connected layers. if args.cnn: kernel_size = [3, 3, 3, 3, 3, 3, 3] channels = [16] for i in range(1, args.layers): channels.append(max(channels[-1]*2, 64)) # weights and biases W = [] b = [] # conv, activations, pooling and normalizes s = [] h = [] p = [] n = [n_x] for i in range(args.layers): channels_in = 1 if i > 0: channels_in = channels[i - 1] W.append(initializer('W_conv' + str(i), [kernel_size[i], kernel_size[i], channels_in, channels[i]])) b.append(initializers.constant('b_conv' + str(i), [channels[i]], value = args.bias)) s.append(layers.convolutional('s_' + str(i), n[-1], W[-1], b[-1])) h.append(activation('h_' + str(i), s[-1])) p.append(layers.pooling('p_' + str(i), h[-1])) n.append(normalizer('n_' + str(i), p[-1])) shape = n[-1].get_shape().as_list() n[-1] = tf.reshape(n[-1], [-1, shape[1]*shape[2]*shape[3]]) else: units = [1000, 1000, 1000, 1000, 1000] # weights and biases W = [] b = [] # linear, activations and normalized s = [] h = [] n = [n_x] for i in range(args.layers): units_in = 784 if i > 0: units_in = units[i - 1] W.append(initializer('W_fc' + str(i), [units_in, units[i]])) b.append(initializers.constant('b_fc' + str(i), [units[i]], value = args.bias)) s.append(layers.inner_product('s_' + str(i), n[-1], W[-1], b[-1])) h.append(activation('h_' + str(i), s[-1])) n.append(normalizer('n_' + str(i), h[-1])) W.append(initializer('W_fc3', [n[-1].get_shape().as_list()[1], 100])) b.append(initializers.constant('b_fc3', [100], value = args.bias)) s.append(layers.inner_product('s_3', n[-1], W[-1], b[-1])) h.append(activation('h_3', s[-1])) n.append(normalizer('n_3', h[-1])) W.append(initializer('W_fc4', [100, 10])) b.append(initializers.constant('b_fc4', [10], value = args.bias)) s.append(layers.inner_product('s_4', n[-1], W[-1], b[-1])) y_ = layers.softmax('y_', s[-1]) # Loss definition and optimizer. cross_entropy = layers.cross_entropy('cross_entropy', y_, y) weights = [v for v in tf.all_variables() if v.name.startswith('W')] loss = cross_entropy + args.regularizer_weight*regularizer('regularizer', weights) prediction = layers.prediction('prediction', y_) label = layers.label('label', y) accuracy = layers.accuracy('accuracy', prediction, label) optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) tformer = transformer.Transformer() tformer.convert(np.float32) tformer.scale(1./255.) # Reshape input if necessary. if not args.cnn: tformer.reshape([None, 784]) with tf.Session(config = tf.ConfigProto(log_device_placement = True)) as sess: sess.run(tf.initialize_all_variables()) losses = [] batch_accuracies = [] train_accuracies = [] test_accuracies = [] n_W = args.layers + 2 n_h = args.layers + 1 weight_means = [[] for i in range(n_W)] activation_means = [[] for i in range(n_h)] iterations = args.epochs*(train_dataset.count//args.batch_size + 1) interval = iterations // 20 for i in range(iterations): # If this is the last batch, we reshuffle after this step. was_last_batch = train_dataset.next_batch_is_last() batch = train_dataset.next_batch() res = sess.run([optimizer, cross_entropy, accuracy] + W + h, feed_dict = {x: tformer.process(batch[0]), y: batch[1]}) for j in range(n_W): weight_means[j].append(np.mean(res[3 + j])) # Get mean and variance of all layer activations after normalization, i.e. after normalization for j in range(n_h): activation_means[j].append(np.mean(res[3 + n_W + j])) losses.append(res[1]) batch_accuracies.append(res[2]) print('Loss [%d]: %f' % (i, res[1])) print('Batch accuracy [%d]: %f' % (i, res[2])) if was_last_batch: train_dataset.shuffle() train_dataset.reset() if i % interval == 0: # Accuracy on training set. train_accuracy = 0 batches = test_train_dataset.count // test_train_dataset.batch_size for j in range(batches): batch = test_train_dataset.next_batch() train_accuracy += sess.run(accuracy, feed_dict = {x: tformer.process(batch[0]), y: batch[1]}) train_accuracy = train_accuracy / batches train_accuracies.append(train_accuracy) print('Train accuracy [%d]: %f' % (i, train_accuracy)) test_train_dataset.reset() # Accuracy on testing set. test_accuracy = 0 batches = test_dataset.count // test_dataset.batch_size for j in range(batches): batch = test_dataset.next_batch() test_accuracy += sess.run(accuracy, feed_dict = {x: tformer.process(batch[0]), y: batch[1]}) test_accuracy = test_accuracy / batches test_accuracies.append(test_accuracy) print('Test accuracy [%d]: %f' % (i, test_accuracy)) test_dataset.reset() sess.close() weights = {} for j in range(args.layers): weights['W_' + str(j)] = np.array(weight_means[j]) activations = {} for j in range(args.layers): activations['h_' + str(j)] = np.array(activation_means[j]) accuracies = { 'batch_accuracies': batch_accuracies, 'train_accuracies': train_accuracies, 'test_accuracies': test_accuracies } return iterations, interval, weights, activations, accuracies
def main(): """ Train and evaluate the model with the chosen parameters. """ test_images, test_labels = mnist.load('t10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte') train_images, train_labels = mnist.load('train-images.idx3-ubyte', 'train-labels.idx1-ubyte') test_images = test_images.reshape(test_images.shape[0], test_images.shape[1], test_images.shape[2], 1) train_images = train_images.reshape(train_images.shape[0], train_images.shape[1], train_images.shape[2], 1) train_dataset = dataset.Dataset(train_images, train_labels, args.batch_size) # Used for testing on trainng and testing set as GPU does not take the full training/ # testing sets in memory. test_train_dataset = dataset.Dataset(train_images, train_labels, 1000) test_dataset = dataset.Dataset(test_images, test_labels, 1000) with tf.device('/gpu:0'): if args.cnn: x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) else: x = tf.placeholder(tf.float32, shape=[None, 784]) y = tf.placeholder(tf.float32, shape=[None, 10]) # Two convolutional layers if requested, otherwise fully connected layers. if args.cnn: kernel_size = [3, 3, 3, 3, 3, 3, 3] channels = [16] for i in range(1, args.layers): channels.append(max(channels[-1] * 2, 64)) #if args.layers % 2 == 0: # channels[args.layers//2 - 1] = 128 # channels[args.layers//2] = 128; # for i in range(args.layers//2 - 1, -1, -1): # channels[i] = channels[i + 1]//2 # channels[args.layers - i - 1] = channels[args.layers - i - 2]//2 #else: # channels[args.layers//2] = 128 # for i in range(args.layers//2 - 1, -1, -1): # channels[i] = channels[i + 1]//2 # channels[args.layers - i - 1] = channels[args.layers - i - 2]//2 # weights and biases W = [] b = [] # conv, activations, pooling and normalizes s = [] h = [] p = [] n = [x] for i in range(args.layers): channels_in = 1 if i > 0: channels_in = channels[i - 1] W.append( initializer('W_conv' + str(i), [ kernel_size[i], kernel_size[i], channels_in, channels[i] ])) b.append( initializers.constant('b_conv' + str(i), [channels[i]], value=args.bias)) s.append( layers.convolutional('s_' + str(i), n[-1], W[-1], b[-1])) h.append(activation('h_' + str(i), s[-1])) p.append(layers.pooling('p_' + str(i), h[-1])) n.append(normalizer('n_' + str(i), p[-1])) shape = n[-1].get_shape().as_list() n[-1] = tf.reshape(n[-1], [-1, shape[1] * shape[2] * shape[3]]) else: units = [1000, 1000, 1000, 1000, 1000] # weights and biases W = [] b = [] # linear, activations and normalized s = [] h = [] n = [x] for i in range(args.layers): units_in = 784 if i > 0: units_in = units[i - 1] W.append(initializer('W_fc' + str(i), [units_in, units[i]])) b.append( initializers.constant('b_fc' + str(i), [units[i]], value=args.bias)) s.append( layers.inner_product('s_' + str(i), n[-1], W[-1], b[-1])) n.append(activation('h_' + str(i), s[-1])) n.append(normalizer('n_' + str(i), h[-1])) W.append(initializer('W_fc3', [n[-1].get_shape().as_list()[1], 100])) b.append(initializers.constant('b_fc3', [100], value=args.bias)) s.append(layers.inner_product('s_3', n[-1], W[-1], b[-1])) h.append(activation('h_3', s[-1])) n.append(normalizer('n_3', h[-1])) W.append(initializer('W_fc4', [100, 10])) b.append(initializers.constant('b_fc4', [10], value=args.bias)) s.append(layers.inner_product('s_4', n[-1], W[-1], b[-1])) y_ = layers.softmax('y_', s[-1]) # Loss definition and optimizer. cross_entropy = layers.cross_entropy('cross_entropy', y_, y) weights = [v for v in tf.all_variables() if v.name.startswith('W')] loss = cross_entropy + args.regularizer_weight * regularizer( 'regularizer', weights) prediction = layers.prediction('prediction', y_) label = layers.label('label', y) accuracy = layers.accuracy('accuracy', prediction, label) optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) tformer = transformer.Transformer() tformer.convert(np.float32) tformer.scale(1. / 255.) # Reshape input if necessary. if not args.cnn: tformer.reshape([None, 784]) with tf.Session(config=tf.ConfigProto( log_device_placement=True)) as sess: sess.run(tf.initialize_all_variables()) # First run to visualize variables and backpropagated gradients. batch = train_dataset.next_batch() res = sess.run([optimizer] + W + h, feed_dict={ x: tformer.process(batch[0]), y: batch[1] }) # Get all the weights for the plot. weights = {} for i in range(args.layers + 1): weights['W_' + str(i)] = res[1 + i] plots.plot_normalized_histograms( weights, 'images/' + name + '_weights_t0.png') # Get all the activations for the plot. activations = {} for i in range(args.layers + 1): activations['h_' + str(i)] = res[1 + args.layers + 1 + i] plots.plot_normalized_histograms( activations, 'images/' + name + '_activations_t0.png') losses = [] batch_accuracies = [] train_accuracies = [] test_accuracies = [] means = [[] for i in range(args.layers)] stds = [[] for i in range(args.layers)] iterations = args.epochs * ( train_dataset.count // args.batch_size + 1) interval = iterations // 20 for i in range(iterations): # If this is the last batch, we reshuffle after this step. was_last_batch = train_dataset.next_batch_is_last() batch = train_dataset.next_batch() res = sess.run([optimizer, cross_entropy, accuracy] + h, feed_dict={ x: tformer.process(batch[0]), y: batch[1] }) # Get mean and variance of all layer activations after normalization, i.e. after normalization for j in range(args.layers): means[j].append(np.mean(res[3 + j])) stds[j].append(np.std(res[3 + j])) losses.append(res[1]) batch_accuracies.append(res[2]) print('Loss [%d]: %f' % (i, res[1])) print('Batch accuracy [%d]: %f' % (i, res[2])) if was_last_batch: train_dataset.shuffle() train_dataset.reset() if i % interval == 0: # Accuracy on training set. train_accuracy = 0 batches = test_train_dataset.count // test_train_dataset.batch_size for j in range(batches): batch = test_train_dataset.next_batch() train_accuracy += sess.run(accuracy, feed_dict={ x: tformer.process( batch[0]), y: batch[1] }) train_accuracy = train_accuracy / batches train_accuracies.append(train_accuracy) print('Train accuracy [%d]: %f' % (i, train_accuracy)) test_train_dataset.reset() # Accuracy on testing set. test_accuracy = 0 batches = test_dataset.count // test_dataset.batch_size for j in range(batches): batch = test_dataset.next_batch() test_accuracy += sess.run(accuracy, feed_dict={ x: tformer.process( batch[0]), y: batch[1] }) test_accuracy = test_accuracy / batches test_accuracies.append(test_accuracy) print('Test accuracy [%d]: %f' % (i, test_accuracy)) test_dataset.reset() # Plot loss for each iteration. plots.plot_lines( {'loss': (np.arange(0, i + 1), np.array(losses))}, 'images/' + name + '_loss.png') statistics = {} for j in range(args.layers): statistics['h_' + str(j) + '_mean'] = (np.arange( 0, i + 1), np.array(means[j])) statistics['h_' + str(j) + '_std'] = (np.arange( 0, i + 1), np.array(stds[j])) # Plot activations for each iteration. plots.plot_lines(statistics, 'images/' + name + '_activations.png') # Plot measures accuracy every 250th iteration. plots.plot_lines( { 'batch_accuracy': (np.arange(0, i + 1), np.array(batch_accuracies)), 'train_accuracy': (np.arange(0, i + 1, interval), np.array(train_accuracies)), 'test_accuracy': (np.arange(0, i + 1, interval), np.array(test_accuracies)), }, 'images/' + name + '_accuracy.png') sess.close()
def training(): pretrained_weights = './pretrain/vgg16.npy' train_log_dir = './log_dr50000/train/' val_log_dir = './log_dr50000/val/' with tf.name_scope('input'): images_train, labels_train = dr5_input.input_data(True, BATCH_SIZE) images_val, labels_val = dr5_input.input_data(False, BATCH_SIZE) image_holder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) label_holder = tf.placeholder(tf.int32, shape=[BATCH_SIZE, N_CLASSES]) logits = vgg.VGG16(image_holder, N_CLASSES, 0.5) loss = layers.loss(logits, label_holder) accuracy = layers.accuracy(logits, label_holder) global_steps = tf.Variable(0, name='global_step', trainable=False) LEARNING_RATE = tf.train.exponential_decay(start_rate, global_steps, decay_steps, deacy_rate, staircase=True) train_op = layers.optimize(loss, LEARNING_RATE, global_steps) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() # The main thread init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.InteractiveSession() sess.run(init) print( '########################## Start Training ##########################') layers.load_with_skip(pretrained_weights, sess, ['fc6', 'fc7', 'fc8']) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) train_summary_writer = tf.summary.FileWriter(train_log_dir, graph=sess.graph) val_summary_writer = tf.summary.FileWriter(val_log_dir, graph=sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break # start_time = time .time() train_images, train_labels = sess.run([images_train, labels_train]) _, train_loss, train_acc, summary_str = sess.run( [train_op, loss, accuracy, summary_op], feed_dict={ image_holder: train_images, label_holder: train_labels }) # duration = time.time() - start_time if step % 50 == 0 or (step + 1) == MAX_STEP: print('step %d, loss = %.4f, accuracy = %.4f%%' % (step, train_loss, train_acc)) train_summary_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run([images_val, labels_val]) val_loss, val_acc = sess.run([loss, accuracy], feed_dict={ image_holder: val_images, label_holder: val_labels }) print('step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss, val_acc)) val_summary_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) lr = sess.run(LEARNING_RATE) print("step %d, learning_rate= %f" % (step, lr)) except tf.errors.OutOfRangeError: coord.request_stop() coord.request_stop() coord.join(threads) sess.close()
def build_training_graph(is_training, X_l, Y_l, ID_l, X_u, ID_u, K, lr, mom, lgc_alpha): CACHE_F = tf.get_variable("cache_f") W = tf.get_variable("Affinity_matrix") D = tf.get_variable("D") assert (K.shape[0] == W.shape[0]) with tf.variable_scope("CNN", reuse=tf.AUTO_REUSE) as scope: losses = {} #logit_l = sup_classifier.logit_small(X_l,num_classes = Y_l.shape[1], is_training=is_training, update_batch_stats=False) #logit_u = sup_classifier.logit_small(X_u,num_classes = Y_l.shape[1], is_training=is_training, update_batch_stats=is_training) logit_l = tf.gather(CACHE_F, ID_l) logit_u = tf.gather(CACHE_F, ID_u) losses['xent_sup'] = L.ce_loss(logit_l, Y_l) losses['mse_sup'] = tf.losses.mean_squared_error(tf.nn.softmax(logit_l), Y_l) losses['mean_acc'] = L.accuracy(logit_l, Y_l) #Concatenate ids and logits ids = tf.concat([ID_l, ID_u], 0) logits = tf.concat([logit_l, logit_u], 0) logits = tf.nn.softmax(logits) #assert ids.shape[0] == logits.shape[0] if FLAGS.loss_func == "lgc": #Unsupervised loss K_ids = tf.gather(K, ids, axis=0) #Get neighbor pairs K_ids_i = tf.reshape(K_ids[:, :, 0], [-1]) K_ids_j = tf.reshape(K_ids[:, :, 1], [-1]) logits_ids = tf.reshape( tf.tile(tf.expand_dims(tf.range(tf.shape(logits)[0]), -1), [1, FLAGS.affmat_k]), [-1]) #all_F_i = tf.gather(logits,logits_ids,axis=0) all_F_i = tf.gather(tf.nn.softmax(CACHE_F), K_ids_i, axis=0) all_F_j = tf.gather(tf.nn.softmax(CACHE_F), K_ids_j, axis=0) #F_j comes from cache all_Wij = tf.reshape(tf.gather(W, ids, axis=0), [-1]) all_Dii = tf.gather(D, K_ids_i, axis=0) all_Djj = tf.gather(D, K_ids_j, axis=0) all_Dii = tf.tile(all_Dii[:, None], [1, all_F_i.shape[1]]) all_Djj = tf.tile(all_Djj[:, None], [1, all_F_j.shape[1]]) all_Fi = tf.multiply(all_Dii, all_F_i) all_Fj = tf.multiply(all_Djj, all_F_j) LGC_unsupervised_loss = (tf.multiply( tf.reduce_sum(tf.square(all_Fi - all_Fj), axis=1), all_Wij)) #LGC_unsupervised_loss = tf.reduce_sum(tf.square(all_F_i - all_F_j),axis=1) losses["lgc_unsupervised_loss"] = tf.reduce_sum(LGC_unsupervised_loss) losses["lgc_supervised_loss"] = losses['xent_sup'] losses["lgc_unsupervised_loss"] = ( int(K.shape[0]) / int(FLAGS.batch_size + FLAGS.ul_batch_size) ) * losses["lgc_unsupervised_loss"] losses["lgc_supervised_loss"] = ( FLAGS.num_labeled / int(FLAGS.batch_size)) * losses['mse_sup'] lgc_lamb = 1 / lgc_alpha - 1 losses["lgc_loss"] = losses[ "lgc_unsupervised_loss"] + lgc_lamb * losses["lgc_supervised_loss"] #Assign to cache assign_op_l = tf.scatter_update(ref=CACHE_F, indices=ID_l, updates=tf.nn.softmax(logit_l)) assign_op_u = tf.scatter_update(ref=CACHE_F, indices=ID_u, updates=tf.nn.softmax(logit_u)) #assign_to_cache = tf.group(assign_op_l,assign_op_u) assign_to_cache = tf.no_op() #Get Trainable vars tvars = tf.trainable_variables() tvars_W = list( filter(lambda x: np.char.find(x.name, "Affinity_matrix") != -1, tvars)) tvars_D = list(filter(lambda x: np.char.find(x.name, "D") != -1, tvars)) tvars_CNN = list( filter(lambda x: np.char.find(x.name, "CNN") != -1, tvars)) tvars_CACHE = list( filter(lambda x: np.char.find(x.name, "cache") != -1, tvars)) print([var.name for var in tvars_CACHE]) opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom) grads_and_vars = opt.compute_gradients(losses['lgc_loss'], tvars_CACHE) train_op = opt.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) else: assign_to_cache = None opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom) grads_and_vars = opt.compute_gradients(losses['xent_sup'], tvars) train_op = opt.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) return losses, train_op, assign_to_cache, logit_l, all_Wij