コード例 #1
0
    def testCreateOnecloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            model_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                          num_ps_tasks=1)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                model_args)
            self.assertEqual(len(framework.get_variables()), 5)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.assertEqual(len(update_ops), 2)

            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, optimizer)
            self.assertEqual(len(grads_and_vars),
                             len(tf.trainable_variables()))
            self.assertEqual(total_loss.op.name, 'total_loss')
            for g, v in grads_and_vars:
                self.assertDeviceEqual(g.device, '/job:worker/device:GPU:0')
                self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
コード例 #2
0
    def testCreateOnecloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                          num_ps_tasks=1)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(clones), 1)
            clone = clones[0]
            self.assertEqual(clone.outputs.op.name,
                             'BatchNormClassifier/fully_connected/Sigmoid')
            self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:0')
            self.assertEqual(clone.scope, '')
            self.assertEqual(len(framework.get_variables()), 5)
            for v in framework.get_variables():
                self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
                self.assertDeviceEqual(v.device, v.value().device)
コード例 #3
0
    def testCreateMulticloneWithPS(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                          num_ps_tasks=2)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(framework.get_variables()), 5)
            for i, v in enumerate(framework.get_variables()):
                t = i % 2
                self.assertDeviceEqual(v.device,
                                       '/job:ps/task:%d/device:CPU:0' % t)
                self.assertDeviceEqual(v.device, v.value().device)
            self.assertEqual(len(clones), 2)
            for i, clone in enumerate(clones):
                self.assertEqual(
                    clone.outputs.op.name,
                    'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
                self.assertEqual(clone.scope, 'clone_%d/' % i)
                self.assertDeviceEqual(clone.device,
                                       '/job:worker/device:GPU:%d' % i)
コード例 #4
0
    def testCreateMulticlone(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            clone_args = (tf_inputs, tf_labels)
            num_clones = 4
            deploy_config = model_deploy.DeploymentConfig(
                num_clones=num_clones)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            self.assertEqual(len(framework.get_variables()), 5)
            for v in framework.get_variables():
                self.assertDeviceEqual(v.device, 'CPU:0')
                self.assertDeviceEqual(v.value().device, 'CPU:0')
            self.assertEqual(len(clones), num_clones)
            for i, clone in enumerate(clones):
                self.assertEqual(
                    clone.outputs.op.name,
                    'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                               clone.scope)
                self.assertEqual(len(update_ops), 2)
                self.assertEqual(clone.scope, 'clone_%d/' % i)
                self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
コード例 #5
0
    def testCreateLogisticClassifier(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = LogisticClassifier
            clone_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=1)

            self.assertEqual(framework.get_variables(), [])
            clones = model_deploy.create_clones(deploy_config, model_fn,
                                                clone_args)
            clone = clones[0]
            self.assertEqual(len(framework.get_variables()), 2)
            for v in framework.get_variables():
                self.assertDeviceEqual(v.device, 'CPU:0')
                self.assertDeviceEqual(v.value().device, 'CPU:0')
            self.assertEqual(clone.outputs.op.name,
                             'LogisticClassifier/fully_connected/Sigmoid')
            self.assertEqual(clone.scope, '')
            self.assertDeviceEqual(clone.device, 'GPU:0')
            self.assertEqual(len(tf.losses.get_losses()), 1)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.assertEqual(update_ops, [])
コード例 #6
0
    def testCPUonly(self):
        deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True)

        self.assertEqual(deploy_config.caching_device(), None)
        self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0')
        self.assertEqual(deploy_config.clone_scope(0), '')
        self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
コード例 #7
0
    def testDefaults(self):
        deploy_config = model_deploy.DeploymentConfig()

        self.assertEqual(framework.get_variables(), [])
        self.assertEqual(deploy_config.caching_device(), None)
        self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
        self.assertEqual(deploy_config.clone_scope(0), '')
        self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
コード例 #8
0
    def testMultiGPU(self):
        deploy_config = model_deploy.DeploymentConfig(num_clones=2)

        self.assertEqual(deploy_config.caching_device(), None)
        self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
        self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1')
        self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
        self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
        self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
        self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
コード例 #9
0
    def testReplicasPS(self):
        deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
                                                      num_ps_tasks=2)

        self.assertDeviceEqual(deploy_config.clone_device(0),
                               '/job:worker/device:GPU:0')
        self.assertEqual(deploy_config.clone_scope(0), '')
        self.assertDeviceEqual(deploy_config.optimizer_device(),
                               '/job:worker/device:CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(),
                               '/job:worker/device:CPU:0')
コード例 #10
0
    def testLocalTrainOp(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(0)
            tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
            tf_labels = tf.constant(self._labels, dtype=tf.float32)

            model_fn = BatchNormClassifier
            model_args = (tf_inputs, tf_labels)
            deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                          clone_on_cpu=True)

            optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)

            self.assertEqual(framework.get_variables(), [])
            model = model_deploy.deploy(deploy_config,
                                        model_fn,
                                        model_args,
                                        optimizer=optimizer)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.assertEqual(len(update_ops), 4)
            self.assertEqual(len(model.clones), 2)
            self.assertEqual(model.total_loss.op.name, 'total_loss')
            self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op')
            self.assertEqual(model.train_op.op.name, 'train_op')

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                moving_mean = framework.get_variables_by_name('moving_mean')[0]
                moving_variance = framework.get_variables_by_name(
                    'moving_variance')[0]
                initial_loss = sess.run(model.total_loss)
                initial_mean, initial_variance = sess.run(
                    [moving_mean, moving_variance])
                self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0])
                self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0])
                for _ in range(10):
                    sess.run(model.train_op)
                final_loss = sess.run(model.total_loss)
                self.assertLess(final_loss, initial_loss / 5.0)

                final_mean, final_variance = sess.run(
                    [moving_mean, moving_variance])
                expected_mean = np.array([0.125, 0.25, 0.375, 0.25])
                expected_var = np.array([0.109375, 0.1875, 0.234375, 0.1875])
                expected_var = self._addBesselsCorrection(16, expected_var)
                self.assertAllClose(final_mean, expected_mean)
                self.assertAllClose(final_variance, expected_var)
コード例 #11
0
    def testMultiGPUPS(self):
        deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                      num_ps_tasks=1)

        self.assertEqual(deploy_config.caching_device()(tf.no_op()), '')
        self.assertDeviceEqual(deploy_config.clone_device(0),
                               '/job:worker/device:GPU:0')
        self.assertDeviceEqual(deploy_config.clone_device(1),
                               '/job:worker/device:GPU:1')
        self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
        self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
        self.assertDeviceEqual(deploy_config.optimizer_device(),
                               '/job:worker/device:CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(),
                               '/job:worker/device:CPU:0')
コード例 #12
0
    def testNoSummariesOnGPUForEvals(self):
        with tf.Graph().as_default():
            deploy_config = model_deploy.DeploymentConfig(num_clones=2)

            # clone function creates a fully_connected layer with a regularizer loss.
            def ModelFn():
                inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
                reg = layers.l2_regularizer(0.001)
                layers.fully_connected(inputs, 30, weights_regularizer=reg)

            # No optimizer here, it's an eval.
            model = model_deploy.deploy(deploy_config, ModelFn)
            # The model summary op should have a few summary inputs and all of them
            # should be on the CPU.
            self.assertTrue(model.summary_op.op.inputs)
            for inp in model.summary_op.op.inputs:
                self.assertEqual('/device:CPU:0', inp.device)
コード例 #13
0
    def testVariablesPS(self):
        deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2)

        with tf.device(deploy_config.variables_device()):
            a = tf.Variable(0)
            b = tf.Variable(0)
            c = tf.no_op()
            d = framework.variable(
                'a', [], caching_device=deploy_config.caching_device())

        self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(a.device, a.value().device)
        self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0')
        self.assertDeviceEqual(b.device, b.value().device)
        self.assertDeviceEqual(c.device, '')
        self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(d.value().device, '')
コード例 #14
0
    def testPS(self):
        deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                      num_ps_tasks=1)

        self.assertDeviceEqual(deploy_config.clone_device(0),
                               '/job:worker/device:GPU:0')
        self.assertEqual(deploy_config.clone_scope(0), '')
        self.assertDeviceEqual(deploy_config.optimizer_device(),
                               '/job:worker/device:CPU:0')
        self.assertDeviceEqual(deploy_config.inputs_device(),
                               '/job:worker/device:CPU:0')
        with tf.device(deploy_config.variables_device()):
            a = tf.Variable(0)
            b = tf.Variable(0)
            c = tf.no_op()
            d = framework.variable(
                'a', [], caching_device=deploy_config.caching_device())
        self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(a.device, a.value().device)
        self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(b.device, b.value().device)
        self.assertDeviceEqual(c.device, '')
        self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
        self.assertDeviceEqual(d.value().device, '')
コード例 #15
0
def train(create_tensor_dict_fn_list, create_model_fn, train_config, master,
          task, num_clones, worker_replicas, clone_on_cpu, ps_tasks,
          worker_job_name, is_chief, train_dir):
    """Training function for models.
  Args:
    create_tensor_dict_fn: a function to create a tensor input dictionary.
    create_model_fn: a function that creates a DetectionModel and generates
                     losses.
    train_config: a train_pb2.TrainConfig protobuf.
    master: BNS name of the TensorFlow master to use.
    task: The task id of this training instance.
    num_clones: The number of clones to run per machine.
    worker_replicas: The number of work replicas to train with.
    clone_on_cpu: True if clones should be forced to run on CPU.
    ps_tasks: Number of parameter server tasks.
    worker_job_name: Name of the worker job.
    is_chief: Whether this replica is the chief replica.
    train_dir: Directory to write checkpoints and training summaries to.
  """
    data_augmentation_options = [
        preprocessor_builder.build(step)
        for step in train_config.data_augmentation_options
    ]

    with tf.Graph().as_default():
        # Build a configuration specifying multi-GPU and multi-replicas.
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=num_clones,
            clone_on_cpu=clone_on_cpu,
            replica_id=task,
            num_replicas=worker_replicas,
            num_ps_tasks=ps_tasks,
            worker_job_name=worker_job_name)

        # Place the global step on the device storing the variables.
        with tf.device(deploy_config.variables_device()):
            global_step = tf.train.create_global_step()

        with tf.device(deploy_config.inputs_device()), \
             tf.name_scope('Input'):
            input_queue_list = []
            for i, create_tensor_dict_fn in enumerate(
                    create_tensor_dict_fn_list):
                input_queue_list.append(
                    _create_input_queue(
                        train_config.batch_size[i] // num_clones,
                        create_tensor_dict_fn,
                        train_config.batch_queue_capacity,
                        train_config.num_batch_queue_threads,
                        train_config.prefetch_queue_capacity,
                        data_augmentation_options))

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        global_summaries = set([])

        model_fn = functools.partial(_create_losses,
                                     create_model_fn=create_model_fn)
        clones = model_deploy.create_clones(deploy_config, model_fn,
                                            [input_queue_list])
        first_clone_scope = clones[0].scope

        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by model_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        with tf.device(deploy_config.optimizer_device()), \
             tf.name_scope('Optimizer'):
            training_optimizer = optimizer_builder.build(
                train_config.optimizer, global_summaries)

        sync_optimizer = None
        if train_config.sync_replicas:
            training_optimizer = tf.train.SyncReplicasOptimizer(
                training_optimizer,
                replicas_to_aggregate=train_config.replicas_to_aggregate,
                total_num_replicas=train_config.worker_replicas)
            sync_optimizer = training_optimizer

        # Create ops required to initialize the model from a given checkpoint.
        init_fn = None
        if train_config.fine_tune_checkpoint:
            var_map = detection_model.restore_map(
                from_detection_checkpoint=train_config.
                from_detection_checkpoint)
            available_var_map = variables_helper.get_variables_available_in_checkpoint(
                var_map, train_config.fine_tune_checkpoint)
            init_saver = tf.train.Saver(available_var_map)

            def initializer_fn(sess):
                init_saver.restore(sess, train_config.fine_tune_checkpoint)

            init_fn = initializer_fn

        with tf.device(deploy_config.optimizer_device()), \
             tf.variable_scope('OptimizeClones'):
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, training_optimizer, regularization_losses=None)
            total_loss = tf.check_numerics(total_loss,
                                           'LossTensor is inf or nan.')

            # Optionally multiply bias gradients by train_config.bias_grad_multiplier.
            if train_config.bias_grad_multiplier:
                biases_regex_list = [r'.*bias(?:es)?', r'.*beta']
                grads_and_vars = variables_helper.multiply_gradients_matching_regex(
                    grads_and_vars,
                    biases_regex_list,
                    multiplier=train_config.bias_grad_multiplier)

            # Optionally freeze some layers by setting their gradients to be zero.
            if train_config.freeze_variables:
                grads_and_vars = variables_helper.freeze_gradients_matching_regex(
                    grads_and_vars, train_config.freeze_variables)

            # Optionally clip gradients
            if train_config.gradient_clipping_by_norm > 0:
                with tf.name_scope('clip_grads'):
                    grads_and_vars = tf.contrib.training.clip_gradient_norms(
                        grads_and_vars, train_config.gradient_clipping_by_norm)

            # Create gradient updates.
            grad_updates = training_optimizer.apply_gradients(
                grads_and_vars, global_step=global_step)
            update_ops.append(grad_updates)

            update_op = tf.group(*update_ops)
            with tf.control_dependencies([update_op]):
                train_tensor = tf.identity(total_loss, name='train_op')

        # Add summaries.
        for (grad, var) in grads_and_vars:
            var_name = var.op.name
            grad_name = 'grad/' + var_name
            global_summaries.add(tf.summary.histogram(grad_name, grad))
            global_summaries.add(tf.summary.histogram(var_name, var))
        # for model_var in tf.contrib.framework.get_model_variables():
        #   global_summaries.add(tf.summary.histogram(model_var.op.name, model_var))
        for loss_tensor in tf.losses.get_losses():
            global_summaries.add(
                tf.summary.scalar(loss_tensor.op.name, loss_tensor))
        global_summaries.add(
            tf.summary.scalar('TotalLoss', tf.losses.get_total_loss()))

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))
        summaries |= global_summaries

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # Soft placement allows placing on CPU ops without GPU implementation.
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)

        # Save checkpoints regularly.
        keep_checkpoint_every_n_hours = train_config.keep_checkpoint_every_n_hours
        saver = tf.train.Saver(
            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)

        scaffold = tf.train.Scaffold(init_fn=init_fn,
                                     summary_op=summary_op,
                                     saver=saver)
        stop_hook = tf.train.StopAtStepHook(
            num_steps=(train_config.num_steps
                       if train_config.num_steps else None), )
        profile_hook = profile_session_run_hooks.ProfileAtStepHook(
            at_step=200, checkpoint_dir=train_dir)
        tf.contrib.training.train(
            train_tensor,
            train_dir,
            master=master,
            is_chief=is_chief,
            scaffold=scaffold,
            hooks=[stop_hook, profile_hook],
            chief_only_hooks=None,
            save_checkpoint_secs=train_config.save_checkpoint_secs,
            save_summaries_steps=train_config.save_summaries_steps,
            config=session_config)