def benchmark_graph_apply(self): with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None)) model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks, self.output_classes, self.num_layers_in_each_block, data_format(), bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) predictions = model(images, training=False) init = tf.global_variables_initializer() batch_size = 64 with tf.Session() as sess: sess.run(init) np_images, _ = random_batch(batch_size) num_burn, num_iters = (3, 30) for _ in range(num_burn): sess.run(predictions, feed_dict={images: np_images}) start = time.time() for _ in range(num_iters): sess.run(predictions, feed_dict={images: np_images}) self._report('apply', start, num_iters, batch_size)
def test_pool_initial_true(self): depth = 7 growth_rate = 2 num_blocks = 4 output_classes = 10 num_layers_in_each_block = [1, 2, 2, 1] batch_size = 1 data_format = ('channels_first') if tf.test.is_gpu_available() else ( 'channels_last') model = densenet.DenseNet(depth, growth_rate, num_blocks, output_classes, num_layers_in_each_block, data_format, bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) if data_format == 'channels_last': rand_input = tf.random_uniform((batch_size, 32, 32, 3)) else: rand_input = tf.random_uniform((batch_size, 3, 32, 32)) output_shape = model(rand_input).shape self.assertEqual(output_shape, (batch_size, output_classes))
def testApply(self): depth = 7 growth_rate = 2 num_blocks = 3 output_classes = 10 num_layers_in_each_block = -1 batch_size = 1 with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None)) model = densenet.DenseNet(depth, growth_rate, num_blocks, output_classes, num_layers_in_each_block, data_format(), bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=False, include_top=True) predictions = model(images, training=False) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) np_images, _ = random_batch(batch_size) out = sess.run(predictions, feed_dict={images: np_images}) self.assertAllEqual([batch_size, output_classes], out.shape)
def _benchmark_eager_train(self, label, make_iterator, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks, self.output_classes, self.num_layers_in_each_block, data_format, bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) optimizer = tf.train.GradientDescentOptimizer(0.1) apply_grads = apply_gradients if defun: model.call = tfe.defun(model.call, compiled=compiled) apply_grads = tfe.defun(apply_gradients, compiled=compiled) num_burn = 3 num_iters = 10 with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() gc.collect() start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() apply_grads(model, optimizer, compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format)
def benchmark_graph_train(self): for batch_size in [16, 32, 64]: with tf.Graph().as_default(): np_images, np_labels = random_batch(batch_size) dataset = tf.data.Dataset.from_tensors( (np_images, np_labels)).repeat() (images, labels) = tf.compat.v1.data.make_one_shot_iterator( dataset).get_next() model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks, self.output_classes, self.num_layers_in_each_block, data_format(), bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) logits = model(images, training=True) cross_ent = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels) regularization = tf.add_n(model.losses) loss = cross_ent + regularization optimizer = tf.train.GradientDescentOptimizer( learning_rate=1.0) train_op = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) (num_burn, num_iters) = (5, 10) for _ in range(num_burn): sess.run(train_op) start = time.time() for _ in range(num_iters): sess.run(train_op) self._report('train', start, num_iters, batch_size)
def _benchmark_eager_apply(self, label, device_and_format, defun=False, execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): device, data_format = device_and_format model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks, self.output_classes, self.num_layers_in_each_block, data_format, bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) if defun: model.call = tfe.defun(model.call, compiled=compiled) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): images, _ = random_batch(batch_size, data_format) for _ in xrange(num_burn): model(images, training=False).cpu() if execution_mode: tfe.async_wait() gc.collect() start = time.time() for _ in xrange(num_iters): model(images, training=False).cpu() if execution_mode: tfe.async_wait() self._report(label, start, num_iters, device, batch_size, data_format)
def test_pool_initial_true(self): depth = 7 growth_rate = 2 num_blocks = 4 output_classes = 10 num_layers_in_each_block = [1, 2, 2, 1] batch_size = 1 model = densenet.DenseNet(depth, growth_rate, num_blocks, output_classes, num_layers_in_each_block, bottleneck=True, compression=0.5, weight_decay=1e-4, dropout_rate=0, pool_initial=True, include_top=True) rand_input = tf.random_uniform((batch_size, 32, 32, 3)) output_shape = model(rand_input).shape self.assertEqual(output_shape, (batch_size, output_classes))