예제 #1
0
def broadcast_object_fn(root_rank=0, session=None, name=None):
    name = name or 'broadcast_object_fn'

    sz = tf.placeholder(tf.int32, [1], name='bcast_object_size')
    bcast_size = broadcast(sz, root_rank, name + '.sz')

    t = tf.placeholder(tf.uint8, [None], name='bcast_object_data')
    bcast_data = broadcast(t, root_rank, name + '.t')

    session = session or ops.get_default_session()

    def _bcast(obj):
        if rank() == root_rank:
            b = io.BytesIO()
            cloudpickle.dump(obj, b)
            t_ = bytearray(b.getvalue())
            sz_ = [len(t_)]
            session.run(bcast_size, feed_dict={sz: sz_})
        else:
            sz_ = [0]
            sz_ = session.run(bcast_size, feed_dict={sz: sz_})
            t_ = np.zeros(sz_, dtype=np.uint8)

        t_ = session.run(bcast_data, feed_dict={t: t_})

        if rank() != root_rank:
            buf = io.BytesIO(t_.tobytes())
            obj = cloudpickle.load(buf)

        return obj

    return _bcast
예제 #2
0
파일: hooks.py 프로젝트: vsl9/OpenSeq2Seq
 def broadcast_global_variables(root_rank):
   from horovod.tensorflow.mpi_ops import broadcast
   ops = []
   for var in tf.global_variables():
     if var.dtype.base_dtype == tf.float16:
       ops.append(tf.assign(var, tf.cast(broadcast(tf.cast(var, tf.float32),
                                                   root_rank), tf.float16)))
     else:
       ops.append(tf.assign(var, broadcast(var, root_rank)))
   return tf.group(*ops)
예제 #3
0
파일: functions.py 프로젝트: raajay/horovod
def broadcast_object(obj,
                     root_rank=0,
                     session=None,
                     name=None,
                     process_set=global_process_set):
    """
    Serializes and broadcasts an object from root rank to all other processes
    in a process set (defaults to all Horovod processes).

    Arguments:
        obj: An object capable of being serialized without losing any context.
        root_rank: The rank of the process from which parameters will be
                   broadcasted to all other processes.
        session: Session for TensorFlow v1 compatibility.
        name: Optional name to use during broadcast, will default to the class
              type.
        process_set: Process set object to limit this operation to a subset of
                     Horovod processes. Default is the global process set.
    Returns:
        The object that was broadcast from the `root_rank`.
    """
    if name is None:
        name = type(obj).__name__

    def to_numpy(v):
        if not _executing_eagerly():
            sess = session or ops.get_default_session()
            return sess.run(v)
        else:
            return v.numpy()

    if rank() == root_rank:
        b = io.BytesIO()
        cloudpickle.dump(obj, b)
        t = tf.convert_to_tensor(bytearray(b.getvalue()), dtype=tf.uint8)
        sz = tf.convert_to_tensor([t.shape[0]], dtype=tf.int32)
        to_numpy(
            broadcast(sz, root_rank, name + '.sz', process_set=process_set))
    else:
        sz = tf.convert_to_tensor([0], dtype=tf.int32)
        sz = to_numpy(
            broadcast(sz, root_rank, name + '.sz', process_set=process_set))
        t = tf.zeros(sz.tolist()[0], dtype=tf.uint8)

    t = to_numpy(broadcast(t, root_rank, name + '.t', process_set=process_set))

    if rank() != root_rank:
        buf = io.BytesIO(t.tobytes())
        obj = cloudpickle.load(buf)

    return obj
예제 #4
0
def broadcast_global_variables(root_rank):
    """Broadcasts all global variables from root rank to all other processes.

    Arguments:
        root_rank: rank of the process from which global variables will be broadcasted
        to all other processes.
    """
    return tf.group(*[tf.assign(var, broadcast(var, root_rank))
                      for var in tf.global_variables()])
예제 #5
0
파일: __init__.py 프로젝트: mfojtak/horovod
def broadcast_global_variables(root_rank):
    """Broadcasts all global variables from root rank to all other processes.

    Arguments:
        root_rank: rank of the process from which global variables will be broadcasted
        to all other processes.
    """
    return tf.group(*[tf.assign(var, broadcast(var, root_rank))
                      for var in tf.global_variables()])
예제 #6
0
 def broadcast_group(variables, root_rank):
     return tf.group(
         *[var.assign(broadcast(var, root_rank)) for var in variables])
예제 #7
0
 def broadcast_group(variables, root_rank):
     for var in variables:
         var.assign(broadcast(var, root_rank))
예제 #8
0
    def __init__(self,
                 reallocate_steps=100,
                 is_chief=True):
        """ Initializes the hook.

        Args:
            checkpoint_dir: A string, base directory for the checkpoint files.
            display_steps: A python integer, display every N steps.
            maximum_train_steps: A python integer, the maximum training steps.
            do_summary: Whether to save summaries when display.
            is_chief: Whether this is the chief process.do_summary:
        """

        tf.logging.info("Create HvdReallocateHook.")

        self._reallocate_steps = reallocate_steps
        self._is_chief = is_chief  

        ###
        self._collection_dict = {}
        name_list=['label_ids', 'mask', 'word_probs', 'sense_probs', 'sense_allocate', 'usage', 'efficiency', 'sense_allocate_matrix', 'word_count']
        for item in name_list:
            self._collection_dict[item] = advanced_get_collection('bas_collection', item)
        vocab_size=self._collection_dict['efficiency'].shape[-1]
        
        bayes_component=self._collection_dict['usage'].shape[-1] // vocab_size
        label_one_hot=tf.reshape(tf.one_hot(indices=self._collection_dict['label_ids'], depth=vocab_size), [-1, vocab_size])
        mask=tf.reshape(self._collection_dict['mask'], [-1])
        mask_expanded=tf.expand_dims(mask, -1)
        masked_label_one_hot=label_one_hot*mask_expanded
        step_efficiency=tf.reshape(self._collection_dict['word_probs'], [-1, vocab_size])*masked_label_one_hot
        self.step_efficiency=tf.reduce_sum(step_efficiency, axis=0)
        self.word_num=tf.cast(tf.reduce_sum(masked_label_one_hot, axis=0), tf.int32)
        sense_allocate_matrix=self._collection_dict['sense_allocate_matrix']
        sense_probs=self._collection_dict['sense_probs']
        label_onehot_reshape_transpose=tf.transpose(label_one_hot)
        label_sense_multi_hot=tf.transpose(tf.sparse.matmul(tf.sparse.transpose(sense_allocate_matrix), label_onehot_reshape_transpose))
        masked_label_sense_multi_hot=mask_expanded*label_sense_multi_hot
        self.step_usage=tf.reduce_sum(tf.reshape(sense_probs, [-1, vocab_size*bayes_component])*masked_label_sense_multi_hot, axis=0)
        def build_assign_op(input_tensor, accumulate_variable):
            op_add=tf.assign_add(accumulate_variable, input_tensor)
            op_zero=tf.assign(accumulate_variable, tf.zeros_like(accumulate_variable, dtype=accumulate_variable.dtype))
            op_allreduce=allreduce(accumulate_variable)
            return op_add, op_zero, op_allreduce
            
        self.word_num_update, self.word_num_zero, self.word_num_allreduce = build_assign_op(self.word_num, self._collection_dict['word_count'])
        self.efficiency_update, self.efficiency_zero, self.efficiency_allreduce = build_assign_op(self.step_efficiency, self._collection_dict['efficiency'])
        self.usage_update, self.usage_zero,  self.usage_allreduce= build_assign_op(self.step_usage, self._collection_dict['usage'])
        self.new_sense_allocate=tf.placeholder(shape=[vocab_size*bayes_component], dtype=tf.int64)
        sense_allocate_assign_op=tf.assign(self._collection_dict['sense_allocate'], self.new_sense_allocate)
        with tf.control_dependencies([sense_allocate_assign_op]):
            sense_allocate_broadcast_op=broadcast(self._collection_dict['sense_allocate'], 0)
        self.sense_allocate_update=tf.group(*[sense_allocate_assign_op, sense_allocate_broadcast_op])
        
        self._fetch_args={}
        global_step = training_util.get_global_step()
        self._fetch_args["global_step"] = global_step
        self._fetch_args['word_num']=self.word_num_update
        self._fetch_args['efficiency']=self.efficiency_update
        self._fetch_args['usage']=self.usage_update
        self._fetch_args['word_count']=self._collection_dict['word_count']
        
        self._allreduce_args={}
        self._allreduce_args['word_num']=self.word_num_allreduce
        self._allreduce_args['efficiency']=self.efficiency_allreduce
        self._allreduce_args['usage']=self.usage_allreduce
        self._allreduce_args['sense_allocate']=self._collection_dict['sense_allocate']
        
        self._zero_args={}
        self._zero_args['word_num']=self.word_num_zero
        self._zero_args['efficiency']=self.efficiency_zero
        self._zero_args['usage']=self.usage_zero
예제 #9
0
파일: functions.py 프로젝트: raajay/horovod
 def broadcast_group(variables, root_rank, process_set: ProcessSet):
     return tf.group(*[
         var.assign(broadcast(var, root_rank, process_set=process_set))
         for var in variables
     ])
예제 #10
0
파일: functions.py 프로젝트: raajay/horovod
 def broadcast_group(variables, root_rank, process_set: ProcessSet):
     for var in variables:
         var.assign(broadcast(var, root_rank, process_set=process_set))