Example #1
0
 def __init__(self, benchmark_cnn, all_reduce_spec, num_workers, num_gpus,
              task_id, allreduce_merge_scope):
     super(VariableMgrCollectiveAllReduce, self).__init__(benchmark_cnn)
     if not all_reduce_spec:
         raise ValueError(
             'collective_all_reduce requires a non-empty all_reduce_spec: %s'
             % all_reduce_spec)
     parsed_spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
     # So far we only support a length-1 all_reduce_spec
     if len(parsed_spec) > 1 or parsed_spec[0].limit > 0:
         raise ValueError(
             'collective_all_reduce requires one single-range all_reduce_spec %s'
             % parsed_spec)
     self._all_reduce_spec = parsed_spec[0]
     if self._all_reduce_spec.alg != 'collective':
         raise ValueError(
             'VariableMgrCollectiveAllReduce initialized with non-collective '
             'all_reduce_spec %s' % self.all_reduce_spec)
     self._num_workers = num_workers
     self._num_gpus = num_gpus
     self._task_id = task_id
     self._allreduce_merge_scope = allreduce_merge_scope
     self._instance_key_counter = 10000
     self._instance_key_table = dict()
     self._single_session = False
     # List of prefixes for generating PS devices, unused here.
     self._all_reduce_device_prefixes = None
 def __init__(self, all_reduce_spec, gpu_indices, agg_small_grads_max_bytes,
              agg_small_grads_max_group):
   spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
   if len(spec) != 1:
     raise ValueError(
         'Replicated mode does not support hybrid all-reduce strategies')
   self._all_reduce_spec = spec[0]
   self._gpu_indices = gpu_indices
   self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
   self._agg_small_grads_max_group = agg_small_grads_max_group
Example #3
0
 def __init__(self, benchmark_cnn, all_reduce_spec, job_name, num_workers,
              agg_small_grads_max_bytes, agg_small_grads_max_group,
              allreduce_merge_scope):
     super(VariableMgrDistributedAllReduce, self).__init__(benchmark_cnn)
     if not all_reduce_spec:
         raise ValueError(
             'distributed_all_reduce requires a non-empty all_reduce_spec')
     self._all_reduce_spec = allreduce.parse_all_reduce_spec(
         all_reduce_spec)
     self._all_reduce_device_prefixes = (
         allreduce.build_all_reduce_device_prefixes(job_name, num_workers))
     self._num_workers = num_workers
     self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
     self._agg_small_grads_max_group = agg_small_grads_max_group
     self._allreduce_merge_scope = allreduce_merge_scope
     if not self._all_reduce_spec:
         raise ValueError('all_reduce_spec must be specified')
     self._single_session = True
Example #4
0
 def __init__(self, benchmark_cnn, all_reduce_spec,
              agg_small_grads_max_bytes, agg_small_grads_max_group,
              allreduce_merge_scope):
     super(VariableMgrLocalReplicated, self).__init__(benchmark_cnn)
     if all_reduce_spec:
         spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
         if len(spec) != 1:
             raise ValueError(
                 'replicated mode does not support hybrid all-reduce strategies'
             )
         self._all_reduce_spec = spec[0]
     else:
         self._all_reduce_spec = None
     self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
     self._agg_small_grads_max_group = agg_small_grads_max_group
     self._warmup_ops = []
     self._allreduce_merge_scope = allreduce_merge_scope
     self._gradient_put_ops = None