def _byteps_average_metrics_in_place(self, logs): logs = logs or {} reduced_logs = {} import byteps.tensorflow as bps if self._allreduce_ranks <= 1.: self._allreduce_ranks = float(bps.size()) # Reduce every metric among workers. Sort metrics by name # to ensure consistent order. for metric, value in sorted(logs.items()): from tensorflow.python.eager import context if context.executing_eagerly(): with tf.device(self._device): reduced_logs[metric] = bps.push_pull( K.constant(value, name=metric)).numpy() else: if metric not in self.variables: with tf.name_scope('MetricAverageCallback') as scope: var = tf.Variable(value, name=metric) K.get_session().run(var.initializer) self._m_vars[metric] = var self._allreduce_ops[metric] = bps.push_pull( var, scope, device_dense=self._device) else: K.set_value(self._m_vars[metric], value) reduced_logs[metric] = K.get_session().run( self._allreduce_ops[metric]) # Override the reduced values back into logs dictionary # for other callbacks to use. for metric, value in reduced_logs.items(): logs[metric] = value / self._allreduce_ranks
def get_gradients(self, loss, params): """ Compute gradients of all trainable variables. See Optimizer.get_gradients() for more info. In DistributedOptimizer, get_gradients() is overriden to also push_pull the gradients before returning them. """ gradients = super(self.__class__, self).get_gradients(loss, params) if bps.size() > 1: averaged_gradients = [] with tf.name_scope(self._name + "_Push_Pull") as scope: for grad in gradients: if grad is not None: if self._sparse_as_dense and \ isinstance(grad, tf.IndexedSlices): grad = tf.convert_to_tensor(grad) avg_grad = bps.push_pull( grad, scope, device_dense=self._device_dense, device_sparse=self._device_sparse, compression=self._compression) averaged_gradients.append(avg_grad) else: averaged_gradients.append(None) return averaged_gradients else: return gradients
def _push_pull(self, gradients): self._aggregated_gradients = True if bps.size() > 1: averaged_gradients = [] with tf.name_scope(self._name + "_Push_Pull") as scope: for grad in gradients: if grad is not None: if self._sparse_as_dense and \ isinstance(grad, tf.IndexedSlices): grad = tf.convert_to_tensor(grad) avg_grad = bps.push_pull( grad, scope, device_dense=self._device_dense, device_sparse=self._device_sparse, compression=self._compression) averaged_gradients.append(avg_grad) else: averaged_gradients.append(None) return averaged_gradients else: return gradients
def _average_metrics_in_place(self, logs): logs = logs or {} reduced_logs = {} # Reduce every metric among workers. Sort metrics by name # to ensure consistent order. for metric, value in sorted(logs.items()): if bps._executing_eagerly(): with tf.device(self.device): reduced_logs[metric] = \ bps.push_pull(self.backend.constant(value, name=metric)).numpy() else: if metric not in self.variables: self.variables[metric], self.push_pull_ops[metric] = \ self._make_variable(metric, value) else: self.backend.set_value(self.variables[metric], value) reduced_logs[metric] = \ self.backend.get_session().run(self.push_pull_ops[metric]) # Override the reduced values back into logs dictionary # for other callbacks to use. for metric, value in reduced_logs.items(): logs[metric] = value
def _push_pull(self, grads): self._aggregated_gradients = True import byteps.tensorflow as bps if bps.size() > 1: averaged_gradients = [] with tf.name_scope( "DistributedLossScaleOptimizer_Push_Pull") as scope: for grad in grads: if grad is not None: if self._sparse_as_dense and isinstance( grad, tf.IndexedSlices): grad = tf.convert_to_tensor(grad) avg_grad = bps.push_pull( grad, scope, device_dense=self._device_dense, device_sparse=self._device_sparse, compression=self._compression) averaged_gradients.append(avg_grad) else: averaged_gradients.append(None) return averaged_gradients else: return grads
def push_pull(backend, value, name, average): return _eval(backend, bps.push_pull(tf.constant(value, name=name), average=average))
def _make_variable(self, metric, value): with tf.name_scope('MetricAverageCallback') as scope: var = tf.Variable(value, name=metric) self.backend.get_session().run(var.initializer) push_pull_op = bps.push_pull(var, scope, device_dense=self.device) return var, push_pull_op
def push_pull(backend, value, name, average): push_pull_op = bps.push_pull(tf.constant(value, name=name), average=average) return backend.get_session().run(push_pull_op)