Ejemplo n.º 1
0
    def _byteps_average_metrics_in_place(self, logs):
        logs = logs or {}
        reduced_logs = {}
        import byteps.tensorflow as bps

        if self._allreduce_ranks <= 1.:
            self._allreduce_ranks = float(bps.size())
        # Reduce every metric among workers. Sort metrics by name
        # to ensure consistent order.
        for metric, value in sorted(logs.items()):
            from tensorflow.python.eager import context
            if context.executing_eagerly():
                with tf.device(self._device):
                    reduced_logs[metric] = bps.push_pull(
                        K.constant(value, name=metric)).numpy()
            else:
                if metric not in self.variables:
                    with tf.name_scope('MetricAverageCallback') as scope:
                        var = tf.Variable(value, name=metric)
                        K.get_session().run(var.initializer)
                        self._m_vars[metric] = var
                        self._allreduce_ops[metric] = bps.push_pull(
                            var, scope, device_dense=self._device)
                else:
                    K.set_value(self._m_vars[metric], value)
                reduced_logs[metric] = K.get_session().run(
                    self._allreduce_ops[metric])

        # Override the reduced values back into logs dictionary
        # for other callbacks to use.
        for metric, value in reduced_logs.items():
            logs[metric] = value / self._allreduce_ranks
Ejemplo n.º 2
0
 def get_gradients(self, loss, params):
     """
     Compute gradients of all trainable variables.
     See Optimizer.get_gradients() for more info.
     In DistributedOptimizer, get_gradients() is overriden to also
     push_pull the gradients before returning them.
     """
     gradients = super(self.__class__, self).get_gradients(loss, params)
     if bps.size() > 1:
         averaged_gradients = []
         with tf.name_scope(self._name + "_Push_Pull") as scope:
             for grad in gradients:
                 if grad is not None:
                     if self._sparse_as_dense and \
                             isinstance(grad, tf.IndexedSlices):
                         grad = tf.convert_to_tensor(grad)
                     avg_grad = bps.push_pull(
                         grad,
                         scope,
                         device_dense=self._device_dense,
                         device_sparse=self._device_sparse,
                         compression=self._compression)
                     averaged_gradients.append(avg_grad)
                 else:
                     averaged_gradients.append(None)
             return averaged_gradients
     else:
         return gradients
Ejemplo n.º 3
0
 def _push_pull(self, gradients):
     self._aggregated_gradients = True
     if bps.size() > 1:
         averaged_gradients = []
         with tf.name_scope(self._name + "_Push_Pull") as scope:
             for grad in gradients:
                 if grad is not None:
                     if self._sparse_as_dense and \
                             isinstance(grad, tf.IndexedSlices):
                         grad = tf.convert_to_tensor(grad)
                     avg_grad = bps.push_pull(
                         grad,
                         scope,
                         device_dense=self._device_dense,
                         device_sparse=self._device_sparse,
                         compression=self._compression)
                     averaged_gradients.append(avg_grad)
                 else:
                     averaged_gradients.append(None)
             return averaged_gradients
     else:
         return gradients
Ejemplo n.º 4
0
 def _average_metrics_in_place(self, logs):
     logs = logs or {}
     reduced_logs = {}
     # Reduce every metric among workers. Sort metrics by name
     # to ensure consistent order.
     for metric, value in sorted(logs.items()):
         if bps._executing_eagerly():
             with tf.device(self.device):
                 reduced_logs[metric] = \
                     bps.push_pull(self.backend.constant(value, name=metric)).numpy()
         else:
             if metric not in self.variables:
                 self.variables[metric], self.push_pull_ops[metric] = \
                     self._make_variable(metric, value)
             else:
                 self.backend.set_value(self.variables[metric], value)
             reduced_logs[metric] = \
                 self.backend.get_session().run(self.push_pull_ops[metric])
     # Override the reduced values back into logs dictionary
     # for other callbacks to use.
     for metric, value in reduced_logs.items():
         logs[metric] = value
Ejemplo n.º 5
0
 def _push_pull(self, grads):
     self._aggregated_gradients = True
     import byteps.tensorflow as bps
     if bps.size() > 1:
         averaged_gradients = []
         with tf.name_scope(
                 "DistributedLossScaleOptimizer_Push_Pull") as scope:
             for grad in grads:
                 if grad is not None:
                     if self._sparse_as_dense and isinstance(
                             grad, tf.IndexedSlices):
                         grad = tf.convert_to_tensor(grad)
                     avg_grad = bps.push_pull(
                         grad,
                         scope,
                         device_dense=self._device_dense,
                         device_sparse=self._device_sparse,
                         compression=self._compression)
                     averaged_gradients.append(avg_grad)
                 else:
                     averaged_gradients.append(None)
             return averaged_gradients
     else:
         return grads
Ejemplo n.º 6
0
def push_pull(backend, value, name, average):
    return _eval(backend,
                 bps.push_pull(tf.constant(value, name=name), average=average))
Ejemplo n.º 7
0
 def _make_variable(self, metric, value):
     with tf.name_scope('MetricAverageCallback') as scope:
         var = tf.Variable(value, name=metric)
         self.backend.get_session().run(var.initializer)
         push_pull_op = bps.push_pull(var, scope, device_dense=self.device)
         return var, push_pull_op
Ejemplo n.º 8
0
def push_pull(backend, value, name, average):
    push_pull_op = bps.push_pull(tf.constant(value, name=name),
                                 average=average)
    return backend.get_session().run(push_pull_op)