def _keras_callback_on_batch_end(callback, batch, logs=None): """broadcast should be done after the first gradient step to ensure optimizer initialization.""" if callback.broadcast_done: return if _tf_major_version == 2: if hasattr(callback.model, 'variables'): for v in callback.model.variables: _tf_assign(v, broadcast(v)) opt_variables = None if hasattr(callback.model.optimizer, 'variables'): opt_variables = callback.model.optimizer.variables() else: opt_variables = callback.model.optimizer.optimizer.variables() # print(opt_variables) for v in opt_variables: _tf_assign(v, broadcast(v)) else: raise RuntimeError('No variables() in %s', callback.model) if _tf_major_version == 1: tf.keras.backend.get_session().run(BroadcastGlobalVariablesOp()) callback.broadcast_done = True
def BroadcastGlobalVariablesOp(): """A TensorFlow operator that broadcasts global variables. This operator if often used with the low-level tf.Session """ ops = [tf.assign(v, broadcast(v)) for v in tf.global_variables()] return tf.group(ops)
def broadcast_variables(variables): """A TensorFlow function that broadcasts global variables. This function is often used with ``tf.GradientTape`` or embedded as part of a training program. """ for v in variables: _tf_assign(v, broadcast(v))
def distributed_initializer(self): bcast_ops = [] for v in self.variables(): bcast_ops.append(tf.assign(v, broadcast(v))) with tf.control_dependencies(bcast_ops): with tf.control_dependencies([self._save_model_op]): return barrier()
def test_broadcast(): v = tf.Variable(True if current_rank() == 0 else False) u = broadcast(v) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) x = sess.run(v) y = sess.run(u) print(x,y)
def test_broadcast(): from kungfu.tensorflow.ops import broadcast v = tf.Variable(True if current_rank() == 0 else False) u = broadcast(v) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) x = sess.run(v) y = sess.run(u) # print(x, y) assert (y == True)
def build_ops(): optimizer = build_optimizer() x = tf.Variable(1.0, tf.float32) y = x * x train_step = optimizer.minimize(y) sync_op = tf.assign(x, broadcast(x)) init_op = tf.global_variables_initializer() return init_op, sync_op, train_step, y
def test_set_tree(steps, warmup_steps=10): from kungfu.python import current_cluster_size from kungfu.tensorflow.ops import all_reduce, broadcast from kungfu.tensorflow.ops.adapt import set_tree n = current_cluster_size() tree_place = tf.placeholder(dtype=tf.int32, shape=(n, )) set_tree_op = set_tree(broadcast(tree_place)) magic = 32 x = tf.Variable(list(range(magic)), dtype=tf.int32) y = all_reduce(x) init = tf.global_variables_initializer() durations = [] with tf.Session() as sess: sess.run(init) from kungfu._utils import one_based_range for step in one_based_range(steps + warmup_steps): v = sess.run(y) assert (v.sum() == n * magic * (magic - 1) / 2) # print(v) tree = gen_tree(n) t0 = time.time() sess.run(set_tree_op, feed_dict={ tree_place: tree, }) dur = time.time() - t0 if step > warmup_steps: durations.append(dur) ds = np.array([d * 1000 for d in durations]) from kungfu._utils import show_duration print( 'test set_tree OK for %d times among %d peers, took ~ %f <- [%f, %f] (ms)' % (len(ds), n, ds.mean(), ds.min(), ds.max()))
config, max_step = get_config() def build_ops(): step_place = tf.placeholder(dtype=tf.int32, shape=()) new_step_op = step_based_schedule(config, step_place) resize_op = resize_cluster_from_url() return step_place, resize_op, new_step_op step_place, resize_op, new_step_op = build_ops() sync_step_op = all_reduce(step_place, op='max') x = tf.Variable(1, tf.int32) y = all_reduce(x) sync_state_op = tf.assign(x, broadcast(x)) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) need_sync = True i = 0 while i < max_step: if need_sync: new_step = sess.run(sync_step_op, feed_dict={step_place: i}) print('sync step: %d -> %d' % (i, new_step)) i = new_step sess.run(sync_state_op) print(i) v = sess.run(y)
def build_ops(): init_step = int(_get_init_step()) print('init_step is %d' % (init_step)) step = counter(init_step) schedule = step_based_schedule(config, step) ckpt_tensor = tf.as_string(step + 1) resize_op = resize_cluster(ckpt_tensor, schedule) return init_step, resize_op init_step, step_op = build_ops() x = tf.Variable(1, tf.int32) y = all_reduce(x) sync_op = tf.assign(x, broadcast(x)) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) need_sync = True for i in range(init_step, max_step): if need_sync: sess.run(sync_op) print(i) v = sess.run(y) print('step %d, np=%d' % (i, v)) # must be called exactly once per step need_sync, keep = sess.run(step_op)
def begin(self): from kungfu.tensorflow.ops import broadcast self._ops = [tf.assign(v, broadcast(v)) for v in tf.global_variables()]