def test_multi_device_inputs(self): test_driver = get_initialised_driver() graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus, True) with self.test_session(graph=graph) as sess: GRAPH_CREATED.send(test_driver.app, iter_msg=None) SESS_STARTED.send(test_driver.app, iter_msg=None) for i in range(2): sess.run(test_driver.app.gradient_op) s_0, s_1, s_2, s_3 = sess.run([ tf.get_default_graph().get_tensor_by_name( 'worker_0/feature_input:0'), tf.get_default_graph().get_tensor_by_name( 'worker_1/feature_input:0'), tf.get_default_graph().get_tensor_by_name( 'worker_2/feature_input:0'), tf.get_default_graph().get_tensor_by_name( 'worker_3/feature_input:0') ]) msg = 'same input data for different devices' self.assertGreater(np.sum(np.abs(s_0 - s_1)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_0 - s_2)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_0 - s_3)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_1 - s_2)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_1 - s_3)), 0.0, msg) self.assertGreater(np.sum(np.abs(s_2 - s_3)), 0.0, msg) SESS_FINISHED.send(test_driver.app, itermsg=None) test_driver.app.stop()
def test_run_vars(self): app_driver = get_initialised_driver() test_graph = app_driver.create_graph(app_driver.app, 1, True) test_tensor = test_graph.get_tensor_by_name("G/conv_bn_selu/conv_/w:0") train_eval_msgs = [] test_vals = [] def get_iter_msgs(_sender, **msg): """" Captures iter_msg and model values for testing""" train_eval_msgs.append(msg['iter_msg']) test_vals.append(sess.run(test_tensor)) print(msg['iter_msg'].to_console_string()) ITER_FINISHED.connect(get_iter_msgs) with self.test_session(graph=test_graph) as sess: GRAPH_CREATED.send(app_driver.app, iter_msg=None) SESS_STARTED.send(app_driver.app, iter_msg=None) iterations = IterationMessageGenerator(initial_iter=0, final_iter=3, validation_every_n=2, validation_max_iter=1, is_training_action=True) app_driver.loop(app_driver.app, iterations()) # Check sequence of iterations self.assertRegexpMatches(train_eval_msgs[0].to_console_string(), 'training') self.assertRegexpMatches(train_eval_msgs[1].to_console_string(), 'training') self.assertRegexpMatches(train_eval_msgs[2].to_console_string(), 'validation') self.assertRegexpMatches(train_eval_msgs[3].to_console_string(), 'training') # Check durations for iter_msg in train_eval_msgs: self.assertGreater(iter_msg.iter_duration, 0.0) # Check training changes test tensor self.assertNotAlmostEqual( np.mean(np.abs(test_vals[0] - test_vals[1])), 0.0) self.assertNotAlmostEqual( np.mean(np.abs(test_vals[2] - test_vals[3])), 0.0) # Check validation doesn't change test tensor self.assertAlmostEqual( np.mean(np.abs(test_vals[1] - test_vals[2])), 0.0) app_driver.app.stop() ITER_FINISHED.disconnect(get_iter_msgs)
def __init__(self, model_dir=None, initial_iter=0, tensorboard_every_n=0, **_unused): self.tensorboard_every_n = tensorboard_every_n # creating new summary subfolder if it's not finetuning self.summary_dir = get_latest_subfolder( os.path.join(model_dir, 'logs'), create_new=initial_iter == 0) self.writer_train = None self.writer_valid = None GRAPH_CREATED.connect(self.init_writer) ITER_STARTED.connect(self.read_tensorboard_op) ITER_FINISHED.connect(self.write_tensorboard)
def test_training_update(self): test_driver = get_initialised_driver() graph = test_driver.create_graph(test_driver.app, 1, True) with self.test_session(graph=graph) as sess: GRAPH_CREATED.send(test_driver.app, iter_msg=None) SESS_STARTED.send(test_driver.app, iter_msg=None) train_op = test_driver.app.gradient_op test_tensor = tf.get_default_graph().get_tensor_by_name( 'G/conv_bn_selu/conv_/w:0') var_0 = sess.run(test_tensor) sess.run(train_op) var_1 = sess.run(test_tensor) square_diff = np.sum(np.abs(var_0 - var_1)) self.assertGreater(square_diff, 0.0, 'train_op does not change model') SESS_FINISHED.send(test_driver.app, itermsg=None) test_driver.app.stop()
def test_init(self): app_driver = get_initialised_driver() test_graph = app_driver.create_graph(app_driver.app, 1, True) app_driver.app.set_iteration_update = set_iteration_update app_driver.app.interpret_output = self.create_interpreter() app_driver.load_event_handlers([ 'niftynet.engine.handler_model.ModelRestorer', 'niftynet.engine.handler_network_output.OutputInterpreter', 'niftynet.engine.handler_sampler.SamplerThreading' ]) with self.test_session(graph=test_graph) as sess: GRAPH_CREATED.send(app_driver.app, iter_msg=None) SESS_STARTED.send(app_driver.app, iter_msg=None) iterator = IterationMessageGenerator(is_training_action=False) app_driver.loop(app_driver.app, iterator()) app_driver.app.stop()
def create_graph(application, num_gpus=1, num_threads=1, is_training_action=False): """ Create a TF graph based on self.app properties and engine parameters. :return: """ graph = tf.Graph() main_device = device_string(num_gpus, 0, False, is_training_action) outputs_collector = OutputsCollector(n_devices=max(num_gpus, 1)) gradients_collector = GradientsCollector(n_devices=max(num_gpus, 1)) # start constructing the graph, handling training and inference cases with graph.as_default(), tf.device(main_device): # initialise sampler with tf.name_scope('Sampler'): application.initialise_sampler() for sampler in traverse_nested(application.get_sampler()): sampler.set_num_threads(num_threads) # initialise network, these are connected in # the context of multiple gpus application.initialise_network() application.add_validation_flag() # for data parallelism -- # defining and collecting variables from multiple devices for gpu_id in range(0, max(num_gpus, 1)): worker_device = device_string(num_gpus, gpu_id, True, is_training_action) scope_string = 'worker_{}'.format(gpu_id) with tf.name_scope(scope_string), tf.device(worker_device): # setup network for each of the multiple devices application.connect_data_and_network( outputs_collector, gradients_collector) with tf.name_scope('MergeOutputs'): outputs_collector.finalise_output_op() application.outputs_collector = outputs_collector application.gradients_collector = gradients_collector GRAPH_CREATED.send(application, iter_msg=None) return graph
def test_init(self): ITER_FINISHED.connect(self.iteration_listener) app_driver = get_initialised_driver() app_driver.load_event_handlers([ 'niftynet.engine.handler_model.ModelRestorer', 'niftynet.engine.handler_console.ConsoleLogger', 'niftynet.engine.handler_sampler.SamplerThreading' ]) graph = app_driver.create_graph(app_driver.app, 1, True) with self.test_session(graph=graph) as sess: GRAPH_CREATED.send(app_driver.app, iter_msg=None) SESS_STARTED.send(app_driver.app, iter_msg=None) msg = IterationMessage() msg.current_iter = 1 app_driver.loop(app_driver.app, [msg]) app_driver.app.stop() ITER_FINISHED.disconnect(self.iteration_listener)
def test_multi_device_gradients(self): test_driver = get_initialised_driver() graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus, True) with self.test_session(graph=graph) as sess: GRAPH_CREATED.send(test_driver.app, iter_msg=None) SESS_STARTED.send(test_driver.app, iter_msg=None) for i in range(2): sess.run(test_driver.app.gradient_op) g_0, g_1, g_2, g_3, g_ave = sess.run([ tf.get_default_graph().get_tensor_by_name( 'worker_0/ComputeGradients/gradients/AddN_5:0'), tf.get_default_graph().get_tensor_by_name( 'worker_1/ComputeGradients/gradients/AddN_5:0'), tf.get_default_graph().get_tensor_by_name( 'worker_2/ComputeGradients/gradients/AddN_5:0'), tf.get_default_graph().get_tensor_by_name( 'worker_3/ComputeGradients/gradients/AddN_5:0'), tf.get_default_graph().get_tensor_by_name( 'ApplyGradients/AveOverDevices:0') ]) msg = 'same gradients for different devices' self.assertGreater(np.sum(np.abs(g_0 - g_1)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_0 - g_2)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_0 - g_3)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_1 - g_2)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_1 - g_3)), 0.0, msg) self.assertGreater(np.sum(np.abs(g_2 - g_3)), 0.0, msg) g_array = np.concatenate([ g_0.reshape((1, -1)), g_1.reshape((1, -1)), g_2.reshape((1, -1)), g_3.reshape((1, -1)) ], axis=0) g_ave = g_ave.reshape(-1) g_np_ave = np.mean(g_array, axis=0) self.assertAllClose(g_np_ave, g_ave) SESS_FINISHED.send(test_driver.app, itermsg=None) test_driver.app.stop()
def __init__(self, is_training_action=False, **_unused): if not is_training_action: return GRAPH_CREATED.connect(self.make_gradients_op) ITER_STARTED.connect(self.add_gradients)