Esempio n. 1
0
 def test_multi_device_inputs(self):
     test_driver = get_initialised_driver()
     graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus,
                                      True)
     with self.test_session(graph=graph) as sess:
         GRAPH_CREATED.send(test_driver.app, iter_msg=None)
         SESS_STARTED.send(test_driver.app, iter_msg=None)
         for i in range(2):
             sess.run(test_driver.app.gradient_op)
             s_0, s_1, s_2, s_3 = sess.run([
                 tf.get_default_graph().get_tensor_by_name(
                     'worker_0/feature_input:0'),
                 tf.get_default_graph().get_tensor_by_name(
                     'worker_1/feature_input:0'),
                 tf.get_default_graph().get_tensor_by_name(
                     'worker_2/feature_input:0'),
                 tf.get_default_graph().get_tensor_by_name(
                     'worker_3/feature_input:0')
             ])
             msg = 'same input data for different devices'
             self.assertGreater(np.sum(np.abs(s_0 - s_1)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(s_0 - s_2)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(s_0 - s_3)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(s_1 - s_2)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(s_1 - s_3)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(s_2 - s_3)), 0.0, msg)
         SESS_FINISHED.send(test_driver.app, itermsg=None)
         test_driver.app.stop()
Esempio n. 2
0
    def run(self, application, graph=None):
        """
        Initialise a TF graph, connect data sampler and network within
        the graph context, run training loops or inference loops.

        :param application: a niftynet application
        :param graph: default base graph to run the application
        :return:
        """
        if graph is None:
            graph = ApplicationDriver.create_graph(
                application=application,
                num_gpus=self.num_gpus,
                num_threads=self.num_threads,
                is_training_action=self.is_training_action)

        start_time = time.time()
        loop_status = {'current_iter': self.initial_iter, 'normal_exit': False}

        with tf.Session(config=tf_config(), graph=graph):
            try:
                # broadcasting event of session started
                SESS_STARTED.send(application, iter_msg=None)

                # create a iteration message generator and
                # iteratively run the graph (the main engine loop)
                iteration_messages = self._generator(**vars(self))()
                ApplicationDriver.loop(application=application,
                                       iteration_messages=iteration_messages,
                                       loop_status=loop_status)

            except KeyboardInterrupt:
                tf.logging.warning('User cancelled application')
            except (tf.errors.OutOfRangeError, EOFError):
                if not loop_status.get('normal_exit', False):
                    # reached the end of inference Dataset
                    loop_status['normal_exit'] = True
            except RuntimeError:
                import sys
                import traceback
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exception(exc_type,
                                          exc_value,
                                          exc_traceback,
                                          file=sys.stdout)
            finally:
                tf.logging.info('cleaning up...')
                # broadcasting session finished event
                iter_msg = IterationMessage()
                iter_msg.current_iter = loop_status.get('current_iter', -1)
                SESS_FINISHED.send(application, iter_msg=iter_msg)

        application.stop()
        if not loop_status.get('normal_exit', False):
            # loop didn't finish normally
            tf.logging.warning('stopped early, incomplete iterations.')
        tf.logging.info("%s stopped (time in second %.2f).",
                        type(application).__name__, (time.time() - start_time))
Esempio n. 3
0
    def test_training_update(self):
        test_driver = get_initialised_driver()
        graph = test_driver.create_graph(test_driver.app, 1, True)
        with self.test_session(graph=graph) as sess:
            SESS_STARTED.send(test_driver.app, iter_msg=None)

            train_op = test_driver.app.gradient_op
            test_tensor = graph.get_tensor_by_name('G/conv_bn_selu/conv_/w:0')
            var_0 = sess.run(test_tensor)
            sess.run(train_op)
            var_1 = sess.run(test_tensor)
            square_diff = np.sum(np.abs(var_0 - var_1))
            self.assertGreater(square_diff, 0.0,
                               'train_op does not change model')
            SESS_FINISHED.send(test_driver.app, itermsg=None)
            test_driver.app.stop()
Esempio n. 4
0
    def test_multi_device_multi_optimiser_gradients(self):
        test_driver = get_initialised_driver(
            application='tests.toy_application.ToyApplicationMultOpti')
        graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus,
                                         True)
        with self.test_session(graph=graph) as sess:
            SESS_STARTED.send(test_driver.app, iter_msg=None)
            for i in range(2):
                sess.run(test_driver.app.gradient_op)
                # query generator gradient sample to check
                dis_0, dis_1, dis_2, dis_3, dis_ave = sess.run([
                    graph.get_tensor_by_name(
                        'worker_0/ComputeGradientsD/gradients/AddN_5:0'),
                    graph.get_tensor_by_name(
                        'worker_1/ComputeGradientsD/gradients/AddN_5:0'),
                    graph.get_tensor_by_name(
                        'worker_2/ComputeGradientsD/gradients/AddN_5:0'),
                    graph.get_tensor_by_name(
                        'worker_3/ComputeGradientsD/gradients/AddN_5:0'),
                    graph.get_tensor_by_name('ApplyGradients/AveOverDevices:0')
                ])

                # query discriminator gradient sample to check
                gen_0, gen_1, gen_2, gen_3, gen_ave = sess.run([
                    graph.get_tensor_by_name(
                        'worker_0/ComputeGradientsG/gradients/worker_0/tinynet/G/conv/conv_/conv/ExpandDims_1_grad/Reshape:0'
                    ),
                    graph.get_tensor_by_name(
                        'worker_1/ComputeGradientsG/gradients/worker_1/tinynet/G/conv/conv_/conv/ExpandDims_1_grad/Reshape:0'
                    ),
                    graph.get_tensor_by_name(
                        'worker_2/ComputeGradientsG/gradients/worker_2/tinynet/G/conv/conv_/conv/ExpandDims_1_grad/Reshape:0'
                    ),
                    graph.get_tensor_by_name(
                        'worker_3/ComputeGradientsG/gradients/worker_3/tinynet/G/conv/conv_/conv/ExpandDims_1_grad/Reshape:0'
                    ),
                    graph.get_tensor_by_name(
                        'ApplyGradients/AveOverDevices_14:0')
                ])
                self.check_gradients(gen_0, gen_1, gen_2, gen_3, gen_ave)
                self.check_gradients(dis_0, dis_1, dis_2, dis_3, dis_ave)
            SESS_FINISHED.send(test_driver.app, itermsg=None)
            test_driver.app.stop()
Esempio n. 5
0
    def __init__(self,
                 model_dir,
                 save_every_n=0,
                 max_checkpoints=1,
                 is_training_action=True,
                 **_unused):

        self.save_every_n = save_every_n
        self.max_checkpoints = max_checkpoints
        self.file_name_prefix = make_model_name(model_dir)
        self.saver = None

        # initialise the saver after the graph finalised
        SESS_STARTED.connect(self.init_saver)
        # save the training model at a positive frequency
        if self.save_every_n > 0:
            ITER_FINISHED.connect(self.save_model_interval)
        # always save the final training model before exiting
        if is_training_action:
            SESS_FINISHED.connect(self.save_model)
Esempio n. 6
0
 def test_multi_device_gradients(self):
     test_driver = get_initialised_driver()
     graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus,
                                      True)
     with self.test_session(graph=graph) as sess:
         GRAPH_CREATED.send(test_driver.app, iter_msg=None)
         SESS_STARTED.send(test_driver.app, iter_msg=None)
         for i in range(2):
             sess.run(test_driver.app.gradient_op)
             g_0, g_1, g_2, g_3, g_ave = sess.run([
                 tf.get_default_graph().get_tensor_by_name(
                     'worker_0/ComputeGradients/gradients/AddN_5:0'),
                 tf.get_default_graph().get_tensor_by_name(
                     'worker_1/ComputeGradients/gradients/AddN_5:0'),
                 tf.get_default_graph().get_tensor_by_name(
                     'worker_2/ComputeGradients/gradients/AddN_5:0'),
                 tf.get_default_graph().get_tensor_by_name(
                     'worker_3/ComputeGradients/gradients/AddN_5:0'),
                 tf.get_default_graph().get_tensor_by_name(
                     'ApplyGradients/AveOverDevices:0')
             ])
             msg = 'same gradients for different devices'
             self.assertGreater(np.sum(np.abs(g_0 - g_1)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(g_0 - g_2)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(g_0 - g_3)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(g_1 - g_2)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(g_1 - g_3)), 0.0, msg)
             self.assertGreater(np.sum(np.abs(g_2 - g_3)), 0.0, msg)
             g_array = np.concatenate([
                 g_0.reshape((1, -1)),
                 g_1.reshape((1, -1)),
                 g_2.reshape((1, -1)),
                 g_3.reshape((1, -1))
             ],
                                      axis=0)
             g_ave = g_ave.reshape(-1)
             g_np_ave = np.mean(g_array, axis=0)
             self.assertAllClose(g_np_ave, g_ave)
         SESS_FINISHED.send(test_driver.app, itermsg=None)
         test_driver.app.stop()
Esempio n. 7
0
 def test_multi_device_gradients(self):
     test_driver = get_initialised_driver()
     graph = test_driver.create_graph(test_driver.app, test_driver.num_gpus,
                                      True)
     with self.test_session(graph=graph) as sess:
         SESS_STARTED.send(test_driver.app, iter_msg=None)
         for i in range(2):
             sess.run(test_driver.app.gradient_op)
             g_0, g_1, g_2, g_3, g_ave = sess.run([
                 graph.get_tensor_by_name(
                     'worker_0/ComputeGradients/gradients/AddN_5:0'),
                 graph.get_tensor_by_name(
                     'worker_1/ComputeGradients/gradients/AddN_5:0'),
                 graph.get_tensor_by_name(
                     'worker_2/ComputeGradients/gradients/AddN_5:0'),
                 graph.get_tensor_by_name(
                     'worker_3/ComputeGradients/gradients/AddN_5:0'),
                 graph.get_tensor_by_name('ApplyGradients/AveOverDevices:0')
             ])
             self.check_gradients(g_0, g_1, g_2, g_3, g_ave)
         SESS_FINISHED.send(test_driver.app, itermsg=None)
         test_driver.app.stop()
Esempio n. 8
0
 def __init__(self, **_unused):
     # SESS_STARTED.connect(self.start_sampler_threads)
     SESS_FINISHED.connect(self.stop_sampler_threads)