def broadcast_object_fn(root_rank=0, session=None, name=None): name = name or 'broadcast_object_fn' sz = tf.placeholder(tf.int32, [1], name='bcast_object_size') bcast_size = broadcast(sz, root_rank, name + '.sz') t = tf.placeholder(tf.uint8, [None], name='bcast_object_data') bcast_data = broadcast(t, root_rank, name + '.t') session = session or ops.get_default_session() def _bcast(obj): if rank() == root_rank: b = io.BytesIO() cloudpickle.dump(obj, b) t_ = bytearray(b.getvalue()) sz_ = [len(t_)] session.run(bcast_size, feed_dict={sz: sz_}) else: sz_ = [0] sz_ = session.run(bcast_size, feed_dict={sz: sz_}) t_ = np.zeros(sz_, dtype=np.uint8) t_ = session.run(bcast_data, feed_dict={t: t_}) if rank() != root_rank: buf = io.BytesIO(t_.tobytes()) obj = cloudpickle.load(buf) return obj return _bcast
def broadcast_global_variables(root_rank): from horovod.tensorflow.mpi_ops import broadcast ops = [] for var in tf.global_variables(): if var.dtype.base_dtype == tf.float16: ops.append(tf.assign(var, tf.cast(broadcast(tf.cast(var, tf.float32), root_rank), tf.float16))) else: ops.append(tf.assign(var, broadcast(var, root_rank))) return tf.group(*ops)
def broadcast_object(obj, root_rank=0, session=None, name=None, process_set=global_process_set): """ Serializes and broadcasts an object from root rank to all other processes in a process set (defaults to all Horovod processes). Arguments: obj: An object capable of being serialized without losing any context. root_rank: The rank of the process from which parameters will be broadcasted to all other processes. session: Session for TensorFlow v1 compatibility. name: Optional name to use during broadcast, will default to the class type. process_set: Process set object to limit this operation to a subset of Horovod processes. Default is the global process set. Returns: The object that was broadcast from the `root_rank`. """ if name is None: name = type(obj).__name__ def to_numpy(v): if not _executing_eagerly(): sess = session or ops.get_default_session() return sess.run(v) else: return v.numpy() if rank() == root_rank: b = io.BytesIO() cloudpickle.dump(obj, b) t = tf.convert_to_tensor(bytearray(b.getvalue()), dtype=tf.uint8) sz = tf.convert_to_tensor([t.shape[0]], dtype=tf.int32) to_numpy( broadcast(sz, root_rank, name + '.sz', process_set=process_set)) else: sz = tf.convert_to_tensor([0], dtype=tf.int32) sz = to_numpy( broadcast(sz, root_rank, name + '.sz', process_set=process_set)) t = tf.zeros(sz.tolist()[0], dtype=tf.uint8) t = to_numpy(broadcast(t, root_rank, name + '.t', process_set=process_set)) if rank() != root_rank: buf = io.BytesIO(t.tobytes()) obj = cloudpickle.load(buf) return obj
def broadcast_global_variables(root_rank): """Broadcasts all global variables from root rank to all other processes. Arguments: root_rank: rank of the process from which global variables will be broadcasted to all other processes. """ return tf.group(*[tf.assign(var, broadcast(var, root_rank)) for var in tf.global_variables()])
def broadcast_global_variables(root_rank): """Broadcasts all global variables from root rank to all other processes. Arguments: root_rank: rank of the process from which global variables will be broadcasted to all other processes. """ return tf.group(*[tf.assign(var, broadcast(var, root_rank)) for var in tf.global_variables()])
def broadcast_group(variables, root_rank): return tf.group( *[var.assign(broadcast(var, root_rank)) for var in variables])
def broadcast_group(variables, root_rank): for var in variables: var.assign(broadcast(var, root_rank))
def __init__(self, reallocate_steps=100, is_chief=True): """ Initializes the hook. Args: checkpoint_dir: A string, base directory for the checkpoint files. display_steps: A python integer, display every N steps. maximum_train_steps: A python integer, the maximum training steps. do_summary: Whether to save summaries when display. is_chief: Whether this is the chief process.do_summary: """ tf.logging.info("Create HvdReallocateHook.") self._reallocate_steps = reallocate_steps self._is_chief = is_chief ### self._collection_dict = {} name_list=['label_ids', 'mask', 'word_probs', 'sense_probs', 'sense_allocate', 'usage', 'efficiency', 'sense_allocate_matrix', 'word_count'] for item in name_list: self._collection_dict[item] = advanced_get_collection('bas_collection', item) vocab_size=self._collection_dict['efficiency'].shape[-1] bayes_component=self._collection_dict['usage'].shape[-1] // vocab_size label_one_hot=tf.reshape(tf.one_hot(indices=self._collection_dict['label_ids'], depth=vocab_size), [-1, vocab_size]) mask=tf.reshape(self._collection_dict['mask'], [-1]) mask_expanded=tf.expand_dims(mask, -1) masked_label_one_hot=label_one_hot*mask_expanded step_efficiency=tf.reshape(self._collection_dict['word_probs'], [-1, vocab_size])*masked_label_one_hot self.step_efficiency=tf.reduce_sum(step_efficiency, axis=0) self.word_num=tf.cast(tf.reduce_sum(masked_label_one_hot, axis=0), tf.int32) sense_allocate_matrix=self._collection_dict['sense_allocate_matrix'] sense_probs=self._collection_dict['sense_probs'] label_onehot_reshape_transpose=tf.transpose(label_one_hot) label_sense_multi_hot=tf.transpose(tf.sparse.matmul(tf.sparse.transpose(sense_allocate_matrix), label_onehot_reshape_transpose)) masked_label_sense_multi_hot=mask_expanded*label_sense_multi_hot self.step_usage=tf.reduce_sum(tf.reshape(sense_probs, [-1, vocab_size*bayes_component])*masked_label_sense_multi_hot, axis=0) def build_assign_op(input_tensor, accumulate_variable): op_add=tf.assign_add(accumulate_variable, input_tensor) op_zero=tf.assign(accumulate_variable, tf.zeros_like(accumulate_variable, dtype=accumulate_variable.dtype)) op_allreduce=allreduce(accumulate_variable) return op_add, op_zero, op_allreduce self.word_num_update, self.word_num_zero, self.word_num_allreduce = build_assign_op(self.word_num, self._collection_dict['word_count']) self.efficiency_update, self.efficiency_zero, self.efficiency_allreduce = build_assign_op(self.step_efficiency, self._collection_dict['efficiency']) self.usage_update, self.usage_zero, self.usage_allreduce= build_assign_op(self.step_usage, self._collection_dict['usage']) self.new_sense_allocate=tf.placeholder(shape=[vocab_size*bayes_component], dtype=tf.int64) sense_allocate_assign_op=tf.assign(self._collection_dict['sense_allocate'], self.new_sense_allocate) with tf.control_dependencies([sense_allocate_assign_op]): sense_allocate_broadcast_op=broadcast(self._collection_dict['sense_allocate'], 0) self.sense_allocate_update=tf.group(*[sense_allocate_assign_op, sense_allocate_broadcast_op]) self._fetch_args={} global_step = training_util.get_global_step() self._fetch_args["global_step"] = global_step self._fetch_args['word_num']=self.word_num_update self._fetch_args['efficiency']=self.efficiency_update self._fetch_args['usage']=self.usage_update self._fetch_args['word_count']=self._collection_dict['word_count'] self._allreduce_args={} self._allreduce_args['word_num']=self.word_num_allreduce self._allreduce_args['efficiency']=self.efficiency_allreduce self._allreduce_args['usage']=self.usage_allreduce self._allreduce_args['sense_allocate']=self._collection_dict['sense_allocate'] self._zero_args={} self._zero_args['word_num']=self.word_num_zero self._zero_args['efficiency']=self.efficiency_zero self._zero_args['usage']=self.usage_zero
def broadcast_group(variables, root_rank, process_set: ProcessSet): return tf.group(*[ var.assign(broadcast(var, root_rank, process_set=process_set)) for var in variables ])
def broadcast_group(variables, root_rank, process_set: ProcessSet): for var in variables: var.assign(broadcast(var, root_rank, process_set=process_set))