def __init__(self, **kwargs): self._name = name or "Distributed%s" % self.__class__.__base__.__name__ self._aggregated_gradients = False self._allreduce_grads = hvd._make_allreduce_grads_fn( self._name, device_dense, device_sparse, compression, sparse_as_dense, op, gradient_predivide_factor, num_groups) self._agg_helper = None if backward_passes_per_step > 1: if hvd._executing_eagerly(): self._agg_helper = LocalGradientAggregationHelperEager( backward_passes_per_step=backward_passes_per_step, allreduce_func=self._allreduce_grads, sparse_as_dense=sparse_as_dense, average_aggregated_gradients= average_aggregated_gradients, ) else: self._agg_helper = LocalGradientAggregationHelper( backward_passes_per_step=backward_passes_per_step, allreduce_func=self._allreduce_grads, sparse_as_dense=sparse_as_dense, average_aggregated_gradients= average_aggregated_gradients, rank=rank(), optimizer_type=LocalGradientAggregationHelper. _OPTIMIZER_TYPE_KERAS, ) super(self.__class__, self).__init__(**kwargs)
def __init__(self, optimizer, name=None, use_locking=False, device_dense='', device_sparse='', compression=Compression.none, sparse_as_dense=False, op=Average, gradient_predivide_factor=1.0, backward_passes_per_step=1, average_aggregated_gradients=False, groups=None): if name is None: name = "Distributed{}".format(type(optimizer).__name__) super(_DistributedOptimizer, self).__init__(name=name, use_locking=use_locking) self._optimizer = optimizer self._allreduce_grads = _make_allreduce_grads_fn( name, device_dense, device_sparse, compression, sparse_as_dense, op, gradient_predivide_factor, groups) self._agg_helper = None if backward_passes_per_step > 1: if _executing_eagerly(): raise ValueError( "backward_passes_per_step > 1 is not yet supported " "for _LegacyOptimizer with eager execution." ) self._agg_helper = LocalGradientAggregationHelper( backward_passes_per_step=backward_passes_per_step, allreduce_func=self._allreduce_grads, sparse_as_dense=sparse_as_dense, average_aggregated_gradients=average_aggregated_gradients, rank=rank(), optimizer_type=LocalGradientAggregationHelper._OPTIMIZER_TYPE_LEGACY, )
def _bcast(obj): if rank() == root_rank: b = io.BytesIO() cloudpickle.dump(obj, b) t_ = bytearray(b.getvalue()) sz_ = [len(t_)] session.run(bcast_size, feed_dict={sz: sz_}) else: sz_ = [0] sz_ = session.run(bcast_size, feed_dict={sz: sz_}) t_ = np.zeros(sz_, dtype=np.uint8) t_ = session.run(bcast_data, feed_dict={t: t_}) if rank() != root_rank: buf = io.BytesIO(t_.tobytes()) obj = cloudpickle.load(buf) return obj
def broadcast_object(obj, root_rank=0, session=None, name=None): """ Serializes and broadcasts an object from root rank to all other processes. Arguments: obj: An object capable of being serialized without losing any context. root_rank: The rank of the process from which parameters will be broadcasted to all other processes. session: Session for TensorFlow v1 compatibility. name: Optional name to use during broadcast, will default to the class type. Returns: The object that was broadcast from the `root_rank`. """ if name is None: name = type(obj).__name__ def to_numpy(v): if not _executing_eagerly(): sess = session or ops.get_default_session() return sess.run(v) else: return v.numpy() if rank() == root_rank: b = io.BytesIO() cloudpickle.dump(obj, b) t = tf.convert_to_tensor(bytearray(b.getvalue()), dtype=tf.uint8) sz = tf.convert_to_tensor([t.shape[0]], dtype=tf.int32) to_numpy(broadcast(sz, root_rank, name + '.sz')) else: sz = tf.convert_to_tensor([0], dtype=tf.int32) sz = to_numpy(broadcast(sz, root_rank, name + '.sz')) t = tf.zeros(sz.tolist()[0], dtype=tf.uint8) t = to_numpy(broadcast(t, root_rank, name + '.t')) if rank() != root_rank: buf = io.BytesIO(t.tobytes()) obj = cloudpickle.load(buf) return obj