Exemple #1
0
 def __init__(self, benchmark_cnn, all_reduce_spec,
              num_workers, num_gpus, task_id, allreduce_merge_scope):
   super(VariableMgrCollectiveAllReduce, self).__init__(benchmark_cnn)
   if not all_reduce_spec:
     raise ValueError(
         'collective_all_reduce requires a non-empty all_reduce_spec: %s'
         % all_reduce_spec)
   parsed_spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
   # So far we only support a length-1 all_reduce_spec
   if len(parsed_spec) > 1 or parsed_spec[0].limit > 0:
     raise ValueError(
         'collective_all_reduce requires one single-range all_reduce_spec %s'
         % parsed_spec)
   self._all_reduce_spec = parsed_spec[0]
   if self._all_reduce_spec.alg != 'collective':
     raise ValueError(
         'VariableMgrCollectiveAllReduce initialized with non-collective '
         'all_reduce_spec %s' % self.all_reduce_spec)
   self._num_workers = num_workers
   self._num_gpus = num_gpus
   self._task_id = task_id
   self._allreduce_merge_scope = allreduce_merge_scope
   self._instance_key_counter = 10000
   self._instance_key_table = dict()
   self._single_session = False
   # List of prefixes for generating PS devices, unused here.
   self._all_reduce_device_prefixes = None
Exemple #2
0
 def __init__(self, all_reduce_spec, gpu_indices, agg_small_grads_max_bytes,
              agg_small_grads_max_group):
     spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
     if len(spec) != 1:
         raise ValueError(
             'Replicated mode does not support hybrid all-reduce strategies'
         )
     self._all_reduce_spec = spec[0]
     self._gpu_indices = gpu_indices
     self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
     self._agg_small_grads_max_group = agg_small_grads_max_group
Exemple #3
0
 def __init__(self, benchmark_cnn, all_reduce_spec, agg_small_grads_max_bytes,
              agg_small_grads_max_group):
   super(VariableMgrLocalReplicated, self).__init__(benchmark_cnn)
   if all_reduce_spec:
     spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
     if len(spec) != 1:
       raise ValueError(
           'replicated mode does not support hybrid all-reduce strategies')
     self._all_reduce_spec = spec[0]
   else:
     self._all_reduce_spec = None
   self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
   self._agg_small_grads_max_group = agg_small_grads_max_group
Exemple #4
0
 def __init__(self, benchmark_cnn, all_reduce_spec, job_name, num_workers,
              agg_small_grads_max_bytes, agg_small_grads_max_group):
   super(VariableMgrDistributedAllReduce, self).__init__(benchmark_cnn)
   if not all_reduce_spec:
     raise ValueError(
         'distributed_all_reduce requires a non-empty all_reduce_spec')
   self._all_reduce_spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
   self._all_reduce_device_prefixes = (
       allreduce.build_all_reduce_device_prefixes(job_name, num_workers))
   self._num_workers = num_workers
   self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
   self._agg_small_grads_max_group = agg_small_grads_max_group
   if not self._all_reduce_spec:
     raise ValueError('all_reduce_spec must be specified')