def __init__(self, benchmark_cnn, all_reduce_spec, num_workers, num_gpus, task_id, allreduce_merge_scope): super(VariableMgrCollectiveAllReduce, self).__init__(benchmark_cnn) if not all_reduce_spec: raise ValueError( 'collective_all_reduce requires a non-empty all_reduce_spec: %s' % all_reduce_spec) parsed_spec = allreduce.parse_all_reduce_spec(all_reduce_spec) # So far we only support a length-1 all_reduce_spec if len(parsed_spec) > 1 or parsed_spec[0].limit > 0: raise ValueError( 'collective_all_reduce requires one single-range all_reduce_spec %s' % parsed_spec) self._all_reduce_spec = parsed_spec[0] if self._all_reduce_spec.alg != 'collective': raise ValueError( 'VariableMgrCollectiveAllReduce initialized with non-collective ' 'all_reduce_spec %s' % self.all_reduce_spec) self._num_workers = num_workers self._num_gpus = num_gpus self._task_id = task_id self._allreduce_merge_scope = allreduce_merge_scope self._instance_key_counter = 10000 self._instance_key_table = dict() self._single_session = False # List of prefixes for generating PS devices, unused here. self._all_reduce_device_prefixes = None
def __init__(self, all_reduce_spec, gpu_indices, agg_small_grads_max_bytes, agg_small_grads_max_group): spec = allreduce.parse_all_reduce_spec(all_reduce_spec) if len(spec) != 1: raise ValueError( 'Replicated mode does not support hybrid all-reduce strategies' ) self._all_reduce_spec = spec[0] self._gpu_indices = gpu_indices self._agg_small_grads_max_bytes = agg_small_grads_max_bytes self._agg_small_grads_max_group = agg_small_grads_max_group
def __init__(self, benchmark_cnn, all_reduce_spec, agg_small_grads_max_bytes, agg_small_grads_max_group): super(VariableMgrLocalReplicated, self).__init__(benchmark_cnn) if all_reduce_spec: spec = allreduce.parse_all_reduce_spec(all_reduce_spec) if len(spec) != 1: raise ValueError( 'replicated mode does not support hybrid all-reduce strategies') self._all_reduce_spec = spec[0] else: self._all_reduce_spec = None self._agg_small_grads_max_bytes = agg_small_grads_max_bytes self._agg_small_grads_max_group = agg_small_grads_max_group
def __init__(self, benchmark_cnn, all_reduce_spec, job_name, num_workers, agg_small_grads_max_bytes, agg_small_grads_max_group): super(VariableMgrDistributedAllReduce, self).__init__(benchmark_cnn) if not all_reduce_spec: raise ValueError( 'distributed_all_reduce requires a non-empty all_reduce_spec') self._all_reduce_spec = allreduce.parse_all_reduce_spec(all_reduce_spec) self._all_reduce_device_prefixes = ( allreduce.build_all_reduce_device_prefixes(job_name, num_workers)) self._num_workers = num_workers self._agg_small_grads_max_bytes = agg_small_grads_max_bytes self._agg_small_grads_max_group = agg_small_grads_max_group if not self._all_reduce_spec: raise ValueError('all_reduce_spec must be specified')