def test_multi_device_inputs(self): test_driver = get_initialised_driver() graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus, True) with self.test_session(graph=graph) as sess: GRAPH_CREATED.send(test_driver.app, iter_msg=None) SESS_STARTED.send(test_driver.app, iter_msg=None) for i in range(2): sess.run(test_driver.app.gradient_op) s_0, s_1, s_2, s_3 = sess.run([ tf.get_default_graph().get_tensor_by_name( 'worker_0/feature_input:0'), tf.get_default_graph().get_tensor_by_name( 'worker_1/feature_input:0'), tf.get_default_graph().get_tensor_by_name( 'worker_2/feature_input:0'), tf.get_default_graph().get_tensor_by_name( 'worker_3/feature_input:0') ]) msg = 'same input data for different devices' self.assertGreater(np.sum(np.abs(s_0 - s_1)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_0 - s_2)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_0 - s_3)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_1 - s_2)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_1 - s_3)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_2 - s_3)), 0.0, msg) SESS_FINISHED.send(test_driver.app, itermsg=None) test_driver.app.stop()
def run(self, application, graph=None): """ Initialise a TF graph, connect data sampler and network within the graph context, run training loops or inference loops. :param application: a niftynet application :param graph: default base graph to run the application :return: """ if graph is None: graph = ApplicationDriver.create_graph( application=application, num_gpus=self.num_gpus, num_threads=self.num_threads, is_training_action=self.is_training_action) start_time = time.time() loop_status = {'current_iter': self.initial_iter, 'normal_exit': False} with tf.Session(config=tf_config(), graph=graph): try: # broadcasting event of session started SESS_STARTED.send(application, iter_msg=None) # create a iteration message generator and # iteratively run the graph (the main engine loop) iteration_messages = self._generator(**vars(self))() ApplicationDriver.loop(application=application, iteration_messages=iteration_messages, loop_status=loop_status) except KeyboardInterrupt: tf.logging.warning('User cancelled application') except (tf.errors.OutOfRangeError, EOFError): if not loop_status.get('normal_exit', False): # reached the end of inference Dataset loop_status['normal_exit'] = True except RuntimeError: import sys import traceback exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, file=sys.stdout) finally: tf.logging.info('cleaning up...') # broadcasting session finished event iter_msg = IterationMessage() iter_msg.current_iter = loop_status.get('current_iter', -1) SESS_FINISHED.send(application, iter_msg=iter_msg) application.stop() if not loop_status.get('normal_exit', False): # loop didn't finish normally tf.logging.warning('stopped early, incomplete iterations.') tf.logging.info("%s stopped (time in second %.2f).", type(application).__name__, (time.time() - start_time))
def __init__(self, model_dir, initial_iter=0, is_training_action=True, **_unused): self.initial_iter = initial_iter self.file_name_prefix = make_model_name(model_dir) # randomly initialise or restoring model if is_training_action and initial_iter == 0: SESS_STARTED.connect(self.rand_init_model) else: SESS_STARTED.connect(self.restore_model)
def test_run_vars(self): app_driver = get_initialised_driver() test_graph = app_driver.create_graph(app_driver.app, 1, True) test_tensor = test_graph.get_tensor_by_name("G/conv_bn_selu/conv_/w:0") train_eval_msgs = [] test_vals = [] def get_iter_msgs(_sender, **msg): """" Captures iter_msg and model values for testing""" train_eval_msgs.append(msg['iter_msg']) test_vals.append(sess.run(test_tensor)) print(msg['iter_msg'].to_console_string()) ITER_FINISHED.connect(get_iter_msgs) with self.test_session(graph=test_graph) as sess: GRAPH_CREATED.send(app_driver.app, iter_msg=None) SESS_STARTED.send(app_driver.app, iter_msg=None) iterations = IterationMessageGenerator(initial_iter=0, final_iter=3, validation_every_n=2, validation_max_iter=1, is_training_action=True) app_driver.loop(app_driver.app, iterations()) # Check sequence of iterations self.assertRegexpMatches(train_eval_msgs[0].to_console_string(), 'training') self.assertRegexpMatches(train_eval_msgs[1].to_console_string(), 'training') self.assertRegexpMatches(train_eval_msgs[2].to_console_string(), 'validation') self.assertRegexpMatches(train_eval_msgs[3].to_console_string(), 'training') # Check durations for iter_msg in train_eval_msgs: self.assertGreater(iter_msg.iter_duration, 0.0) # Check training changes test tensor self.assertNotAlmostEqual( np.mean(np.abs(test_vals[0] - test_vals[1])), 0.0) self.assertNotAlmostEqual( np.mean(np.abs(test_vals[2] - test_vals[3])), 0.0) # Check validation doesn't change test tensor self.assertAlmostEqual( np.mean(np.abs(test_vals[1] - test_vals[2])), 0.0) app_driver.app.stop() ITER_FINISHED.disconnect(get_iter_msgs)
def test_training_update(self): test_driver = get_initialised_driver() graph = test_driver.create_graph(test_driver.app, 1, True) with self.test_session(graph=graph) as sess: SESS_STARTED.send(test_driver.app, iter_msg=None) train_op = test_driver.app.gradient_op test_tensor = graph.get_tensor_by_name('G/conv_bn_selu/conv_/w:0') var_0 = sess.run(test_tensor) sess.run(train_op) var_1 = sess.run(test_tensor) square_diff = np.sum(np.abs(var_0 - var_1)) self.assertGreater(square_diff, 0.0, 'train_op does not change model') SESS_FINISHED.send(test_driver.app, itermsg=None) test_driver.app.stop()
def test_init(self): ITER_FINISHED.connect(self.iteration_listener) app_driver = get_initialised_driver() app_driver.load_event_handlers([ 'niftynet.engine.handler_model.ModelRestorer', 'niftynet.engine.handler_console.ConsoleLogger', 'niftynet.engine.handler_sampler.SamplerThreading' ]) graph = app_driver.create_graph(app_driver.app, 1, True) with self.cached_session(graph=graph) as sess: SESS_STARTED.send(app_driver.app, iter_msg=None) msg = IterationMessage() msg.current_iter = 1 app_driver.loop(app_driver.app, [msg]) app_driver.app.stop() ITER_FINISHED.disconnect(self.iteration_listener)
def test_init(self): app_driver = get_initialised_driver() test_graph = app_driver.create_graph(app_driver.app, 1, True) app_driver.app.set_iteration_update = set_iteration_update app_driver.app.interpret_output = self.create_interpreter() app_driver.load_event_handlers([ 'niftynet.engine.handler_model.ModelRestorer', 'niftynet.engine.handler_network_output.OutputInterpreter', 'niftynet.engine.handler_sampler.SamplerThreading' ]) with self.test_session(graph=test_graph) as sess: SESS_STARTED.send(app_driver.app, iter_msg=None) iterator = IterationMessageGenerator(is_training_action=False) app_driver.loop(app_driver.app, iterator()) app_driver.app.stop()
def test_multi_device_multi_optimiser_gradients(self): test_driver = get_initialised_driver( application='tests.toy_application.ToyApplicationMultOpti') graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus, True) with self.test_session(graph=graph) as sess: SESS_STARTED.send(test_driver.app, iter_msg=None) for i in range(2): sess.run(test_driver.app.gradient_op) # query generator gradient sample to check dis_0, dis_1, dis_2, dis_3, dis_ave = sess.run([ graph.get_tensor_by_name( 'worker_0/ComputeGradientsD/gradients/AddN_5:0'), graph.get_tensor_by_name( 'worker_1/ComputeGradientsD/gradients/AddN_5:0'), graph.get_tensor_by_name( 'worker_2/ComputeGradientsD/gradients/AddN_5:0'), graph.get_tensor_by_name( 'worker_3/ComputeGradientsD/gradients/AddN_5:0'), graph.get_tensor_by_name('ApplyGradients/AveOverDevices:0') ]) # query discriminator gradient sample to check gen_0, gen_1, gen_2, gen_3, gen_ave = sess.run([ graph.get_tensor_by_name( 'worker_0/ComputeGradientsG/gradients/worker_0/tinynet/G/conv/conv_/conv/ExpandDims_1_grad/Reshape:0' ), graph.get_tensor_by_name( 'worker_1/ComputeGradientsG/gradients/worker_1/tinynet/G/conv/conv_/conv/ExpandDims_1_grad/Reshape:0' ), graph.get_tensor_by_name( 'worker_2/ComputeGradientsG/gradients/worker_2/tinynet/G/conv/conv_/conv/ExpandDims_1_grad/Reshape:0' ), graph.get_tensor_by_name( 'worker_3/ComputeGradientsG/gradients/worker_3/tinynet/G/conv/conv_/conv/ExpandDims_1_grad/Reshape:0' ), graph.get_tensor_by_name( 'ApplyGradients/AveOverDevices_14:0') ]) self.check_gradients(gen_0, gen_1, gen_2, gen_3, gen_ave) self.check_gradients(dis_0, dis_1, dis_2, dis_3, dis_ave) SESS_FINISHED.send(test_driver.app, itermsg=None) test_driver.app.stop()
def __init__(self, model_dir, save_every_n=0, max_checkpoints=1, is_training_action=True, **_unused): self.save_every_n = save_every_n self.max_checkpoints = max_checkpoints self.file_name_prefix = make_model_name(model_dir) self.saver = None # initialise the saver after the graph finalised SESS_STARTED.connect(self.init_saver) # save the training model at a positive frequency if self.save_every_n > 0: ITER_FINISHED.connect(self.save_model_interval) # always save the final training model before exiting if is_training_action: SESS_FINISHED.connect(self.save_model)
def test_multi_device_gradients(self): test_driver = get_initialised_driver() graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus, True) with self.test_session(graph=graph) as sess: GRAPH_CREATED.send(test_driver.app, iter_msg=None) SESS_STARTED.send(test_driver.app, iter_msg=None) for i in range(2): sess.run(test_driver.app.gradient_op) g_0, g_1, g_2, g_3, g_ave = sess.run([ tf.get_default_graph().get_tensor_by_name( 'worker_0/ComputeGradients/gradients/AddN_5:0'), tf.get_default_graph().get_tensor_by_name( 'worker_1/ComputeGradients/gradients/AddN_5:0'), tf.get_default_graph().get_tensor_by_name( 'worker_2/ComputeGradients/gradients/AddN_5:0'), tf.get_default_graph().get_tensor_by_name( 'worker_3/ComputeGradients/gradients/AddN_5:0'), tf.get_default_graph().get_tensor_by_name( 'ApplyGradients/AveOverDevices:0') ]) msg = 'same gradients for different devices' self.assertGreater(np.sum(np.abs(g_0 - g_1)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_0 - g_2)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_0 - g_3)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_1 - g_2)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_1 - g_3)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_2 - g_3)), 0.0, msg) g_array = np.concatenate([ g_0.reshape((1, -1)), g_1.reshape((1, -1)), g_2.reshape((1, -1)), g_3.reshape((1, -1)) ], axis=0) g_ave = g_ave.reshape(-1) g_np_ave = np.mean(g_array, axis=0) self.assertAllClose(g_np_ave, g_ave) SESS_FINISHED.send(test_driver.app, itermsg=None) test_driver.app.stop()
def test_multi_device_gradients(self): test_driver = get_initialised_driver() graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus, True) with self.test_session(graph=graph) as sess: SESS_STARTED.send(test_driver.app, iter_msg=None) for i in range(2): sess.run(test_driver.app.gradient_op) g_0, g_1, g_2, g_3, g_ave = sess.run([ graph.get_tensor_by_name( 'worker_0/ComputeGradients/gradients/AddN_5:0'), graph.get_tensor_by_name( 'worker_1/ComputeGradients/gradients/AddN_5:0'), graph.get_tensor_by_name( 'worker_2/ComputeGradients/gradients/AddN_5:0'), graph.get_tensor_by_name( 'worker_3/ComputeGradients/gradients/AddN_5:0'), graph.get_tensor_by_name('ApplyGradients/AveOverDevices:0') ]) self.check_gradients(g_0, g_1, g_2, g_3, g_ave) SESS_FINISHED.send(test_driver.app, itermsg=None) test_driver.app.stop()