def testLocalTrainOp(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = BatchNormClassifier model_args = (tf_inputs, tf_labels) deploy_config = model_deploy.DeploymentConfig(num_clones=2, clone_on_cpu=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) self.assertEqual(slim.get_variables(), []) model = model_deploy.deploy(deploy_config, model_fn, model_args, optimizer=optimizer) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.assertEqual(len(update_ops), 4) self.assertEqual(len(model.clones), 2) self.assertEqual(model.total_loss.op.name, 'total_loss') self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op') self.assertEqual(model.train_op.op.name, 'train_op') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) moving_mean = tf.contrib.framework.get_variables_by_name( 'moving_mean')[0] moving_variance = tf.contrib.framework.get_variables_by_name( 'moving_variance')[0] initial_loss = sess.run(model.total_loss) initial_mean, initial_variance = sess.run([moving_mean, moving_variance]) self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0]) self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0]) for _ in range(10): sess.run(model.train_op) final_loss = sess.run(model.total_loss) self.assertLess(final_loss, initial_loss / 5.0) final_mean, final_variance = sess.run([moving_mean, moving_variance]) expected_mean = np.array([0.125, 0.25, 0.375, 0.25]) expected_var = np.array([0.109375, 0.1875, 0.234375, 0.1875]) expected_var = self._addBesselsCorrection(16, expected_var) self.assertAllClose(final_mean, expected_mean) self.assertAllClose(final_variance, expected_var)
def testNoSummariesOnGPUForEvals(self): with tf.Graph().as_default(): deploy_config = model_deploy.DeploymentConfig(num_clones=2) # clone function creates a fully_connected layer with a regularizer loss. def ModelFn(): inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32) reg = tf.contrib.layers.l2_regularizer(0.001) tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg) # No optimizer here, it's an eval. model = model_deploy.deploy(deploy_config, ModelFn) # The model summary op should have a few summary inputs and all of them # should be on the CPU. self.assertTrue(model.summary_op.op.inputs) for inp in model.summary_op.op.inputs: self.assertEqual('/device:CPU:0', inp.device)
def testNoSummariesOnGPU(self): with tf.Graph().as_default(): deploy_config = model_deploy.DeploymentConfig(num_clones=2) # clone function creates a fully_connected layer with a regularizer loss. def ModelFn(): inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32) reg = tf.keras.regularizers.l2(0.5 * (0.001)) tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg) model = model_deploy.deploy( deploy_config, ModelFn, optimizer=tf.compat.v1.train.GradientDescentOptimizer(1.0)) # The model summary op should have a few summary inputs and all of them # should be on the CPU. self.assertTrue(model.summary_op.op.inputs) for inp in model.summary_op.op.inputs: self.assertEqual('/device:CPU:0', inp.device)
def testNoSummariesOnGPUForEvals(self): with tf.Graph().as_default(): deploy_config = model_deploy.DeploymentConfig(num_clones=2) # clone function creates a fully_connected layer with a regularizer loss. def ModelFn(): inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32) reg = tf.contrib.layers.l2_regularizer(0.001) tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg) # No optimizer here, it's an eval_bkp. model = model_deploy.deploy(deploy_config, ModelFn) # The model summary op should have a few summary inputs and all of them # should be on the CPU. self.assertTrue(model.summary_op.op.inputs) for inp in model.summary_op.op.inputs: self.assertEqual('/device:CPU:0', inp.device)
def _run(args): network = networks.catalogue[args.network](args) if args.cpu: deploy_config = _configure_deployment(num_clones=1, clone_on_cpu=True) sess = tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=args.num_inter_threads, intra_op_parallelism_threads=args.num_intra_threads)) else: deploy_config = _configure_deployment(num_clones=args.num_gpus, clone_on_cpu=False) sess = tf.Session(config=_configure_session( inter=args.num_inter_threads, intra=args.num_intra_threads)) with tf.device(deploy_config.variables_device()): global_step = tf.train.create_global_step() with tf.device(deploy_config.optimizer_device()): lr = tf.train.polynomial_decay(args.learning_rate, global_step=global_step, end_learning_rate=args.min_lr, decay_steps=args.decay_steps, power=1, cycle=True) if args.optimizer.startswith('a'): optimizer = tf.train.AdamOptimizer(learning_rate=lr) elif args.optimizer.startswith('r'): optimizer = tf.train.RMSPropOptimizer( learning_rate=lr, momentum=0.9, epsilon=1.0, ) else: optimizer = tf.train.MomentumOptimizer( learning_rate=lr, momentum=0.9, use_nesterov=True, ) '''Inputs''' with tf.device(deploy_config.inputs_device()), tf.name_scope('inputs'): pipeline = inputs.Pipeline(args, sess) examples, labels = pipeline.data images = examples['image'] image_splits = tf.split(value=images, num_or_size_splits=deploy_config.num_clones, name='split_images') label_splits = tf.split(value=labels, num_or_size_splits=deploy_config.num_clones, name='split_labels') '''Model Creation''' model_dp = model_deploy.deploy( config=deploy_config, model_fn=_clone_fn, optimizer=optimizer, kwargs={ 'images': image_splits, 'labels': label_splits, 'index_iter': iter(range(deploy_config.num_clones)), 'network': network, 'is_training': False # pipeline.is_training }) '''Metrics''' with tf.name_scope('outputs'): train_metrics = metrics.Metrics( labels=labels, clone_logits=[ clone.outputs['logits'] for clone in model_dp.clones ], clone_predictions=[ clone.outputs['predictions'] for clone in model_dp.clones ], device=deploy_config.variables_device(), name='training') validation_metrics = metrics.Metrics( labels=labels, clone_logits=[ clone.outputs['logits'] for clone in model_dp.clones ], clone_predictions=[ clone.outputs['predictions'] for clone in model_dp.clones ], device=deploy_config.variables_device(), name='validation', padded_data=True) validation_init_op = tf.group(pipeline.validation_iterator.initializer, validation_metrics.reset_op) train_op = tf.group(model_dp.train_op, train_metrics.update_op) '''Summaries''' with tf.device(deploy_config.variables_device()): train_writer = tf.summary.FileWriter(args.model_dir, sess.graph) eval_dir = os.path.join(args.model_dir, 'eval') eval_writer = tf.summary.FileWriter(eval_dir, sess.graph) tf.summary.scalar('accuracy', train_metrics.accuracy) tf.summary.scalar('loss', model_dp.total_loss) tf.summary.scalar('learning_rate', lr) all_summaries = tf.summary.merge_all() '''Model Checkpoints''' saver = tf.train.Saver(max_to_keep=args.keep_last_n_checkpoints) save_path = os.path.join(args.model_dir, 'model.ckpt') '''Model Initialization''' last_checkpoint = tf.train.latest_checkpoint(args.model_dir) if last_checkpoint: init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) saver.restore(sess, last_checkpoint) else: init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) starting_step = sess.run(global_step) '''Save pb graph that C++ can load and run.''' tf.train.write_graph(sess.graph_def, './graph/', 'sqz.pb', False) def _eval(args): dtime = [] for i in range(args.eval_steps): try: if args.timeline > 0 and i > 0 and i % args.timeline == 0: options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() t0 = time.time() sess.run( fetches=validation_metrics.update_op, feed_dict=pipeline.validation_data, options=options, run_metadata=run_metadata, ) duration = time.time() - t0 fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.\ generate_chrome_trace_format() with open( 'timeline/timeline_cpu_clone1_{0:03}.json'.format( i), 'w') as f: f.write(chrome_trace) else: t0 = time.time() sess.run( fetches=validation_metrics.update_op, feed_dict=pipeline.validation_data, ) duration = time.time() - t0 if args.verbose: print('step {:03}: {:.4f} sec'.format(i, duration)) dtime.append(duration) except: i = i - 1 break num_examples = (i) * args.batch_size if len(dtime) <= 1: print('Only one warm-up step was executed! Run more steps') else: warmup = dtime[0] dtime = dtime[1:] t_sum = np.sum(dtime) t_mean = np.mean(dtime) t_median = np.median(dtime) t_min = np.min(dtime) t_max = np.max(dtime) t_std = np.std(dtime) """ print('''{:.0f} batches x {:.0f} bs = total {:.0f} images throughput[avg] = {:.1f} ips throughput[med] = {:.1f} ips latency[median] = {:>.4} ms latency[averge] = {:>.4} ms''' .format( i, args.batch_size, num_examples, num_examples/t_sum, args.batch_size/t_median, t_median*1000/args.batch_size, t_sum*1000/num_examples)) """ print('''SqueezeNet Inference Summary: {:.0f} batches x {:.0f} bs = total {:.0f} images evaluated batch size = {} throughput[med] = {:.1f} image/sec latency[median] = {:>.4} ms '''.format(i, args.batch_size, num_examples, args.batch_size, args.batch_size / t_median, t_median * 1000 / args.batch_size)) # Reinitialize dataset and metrics after going through all validation # examples sess.run(validation_init_op) def _train(args): '''Main Loop''' for train_step in range(starting_step, args.max_train_steps): sess.run(train_op, feed_dict=pipeline.training_data) '''Summary Hook''' if train_step % args.summary_interval == 0: results = sess.run(fetches={ 'accuracy': train_metrics.accuracy, 'summary': all_summaries }, feed_dict=pipeline.training_data) train_writer.add_summary(results['summary'], train_step) print('*** Step {:<5}'.format(train_step)) print('Train: acc= {:>.4}%'.format(results['accuracy'] * 100)) '''Checkpoint Hooks''' if train_step % args.checkpoint_interval == 0: saver.save(sess, save_path, global_step) sess.run(train_metrics.reset_op) '''Eval Hook''' if train_step % args.validation_interval == 0: while True: try: sess.run(fetches=validation_metrics.update_op, feed_dict=pipeline.validation_data) except tf.errors.OutOfRangeError: break results = sess.run({'accuracy': validation_metrics.accuracy}) print('Validation: acc= {:>.4}%'.format(results['accuracy'] * 100)) summary = tf.Summary(value=[ tf.Summary.Value(tag='accuracy', simple_value=results['accuracy']), ]) eval_writer.add_summary(summary, train_step) # Reinitialize dataset and metrics sess.run(validation_init_op) if args.inference_only: _eval(args) else: _train(args)