def _keras_callback_on_batch_end(callback, batch, logs=None):
    """broadcast should be done after the first gradient step to ensure optimizer initialization."""
    if callback.broadcast_done:
        return

    if _tf_major_version == 2:
        if hasattr(callback.model, 'variables'):
            for v in callback.model.variables:
                _tf_assign(v, broadcast(v))

            opt_variables = None
            if hasattr(callback.model.optimizer, 'variables'):
                opt_variables = callback.model.optimizer.variables()
            else:
                opt_variables = callback.model.optimizer.optimizer.variables()

            # print(opt_variables)
            for v in opt_variables:
                _tf_assign(v, broadcast(v))
        else:
            raise RuntimeError('No variables() in %s', callback.model)

    if _tf_major_version == 1:
        tf.keras.backend.get_session().run(BroadcastGlobalVariablesOp())

    callback.broadcast_done = True
def broadcast_variables(variables):
    """A TensorFlow function that broadcasts global variables.

    This function is often used with ``tf.GradientTape`` or embedded as part of a training program.
    """
    for v in variables:
        _tf_assign(v, broadcast(v))
Example #3
0
    def apply_gradients(self, apply_grads_func, grads_and_vars, **kwargs):
        np, rank = current_cluster_size(), current_rank()
        target = get_random_peer(np, rank)
        gradients, variables = list(zip(*grads_and_vars))

        init_store_op = tf.cond(tf.equal(self._step, 0),
                                lambda: self.init_store(variables), tf.no_op)
        with tf.control_dependencies([init_store_op]):
            other_peer_vars = self._build_request_ops(target, variables)

        save_model_op = self._build_save_op(variables)

        assign_ops = [
            _tf_assign(v, 0.5 * (v + other_v))
            for v, other_v in zip(variables, other_peer_vars)
        ]

        # We need to re-zip gradients and variables as grads_and_vars can be only unzipped once.
        new_grads_and_vars = zip(gradients, variables)
        apply_op = apply_grads_func(new_grads_and_vars, **kwargs)

        with tf.control_dependencies(assign_ops):
            with tf.control_dependencies([apply_op]):
                with tf.control_dependencies([save_model_op]):
                    return tf.group(apply_op)
Example #4
0
    def apply_gradients(self, apply_grads_func, grads_and_vars, **kwargs):
        # It is important to apply model averaging every iteration [2]
        gradients, variables = list(zip(*grads_and_vars))
        sum_vars = group_all_reduce(variables)
        avg_vars = [g / self._num_workers for g in sum_vars]

        # TODO: Apply momentum to the averaged model [2]
        assign_ops = [
            _tf_assign(v, avg_v) for v, avg_v in zip(variables, avg_vars)
        ]

        # We need to re-zip gradients and variables as grads_and_vars can be only unzipped once.
        new_grads_and_vars = zip(gradients, variables)

        # We can overlap model averaging and local SGD [2].
        with tf.control_dependencies(assign_ops):
            return apply_grads_func(new_grads_and_vars, **kwargs)