def __init__(self, dataset, sink_size, epoch_num, iter_first_order): super().__init__(dataset, sink_size, epoch_num) sink_count = 1 if hasattr(dataset, '__loop_size__'): loop_size = dataset.__loop_size__ + iter_first_order if loop_size <= dataset.get_dataset_size( ) and dataset.get_dataset_size() % loop_size != 0: raise ValueError( f'Dataset size {dataset.get_dataset_size()} and ' f'sink_size {loop_size} are not matched.') sink_count = math.ceil(dataset.get_dataset_size() / loop_size) * 2 self.sink_count = sink_count ms_role = os.getenv("MS_ROLE") if ms_role in ("MS_PSERVER", "MS_SCHED"): self.sink_count = 1 # for self._parallel_mode equal to semi_auto_parallel or auto_parallel, and not using full_batch, # use a complete tensor to compile, and slice tensor to run. The batch dimension of tensors for # compile is device_number times the batch dimension of tensors for run. Now only support LoopSink. if _need_to_full(): device_num = _get_device_num() self.dataset_shapes = _to_full_shapes(self.dataset_shapes, device_num) def op(): return tuple() self.op = op
def __init__(self, dataset, iter_first_order): super(_DatasetIterMSLoopSink, self).__init__(dataset) loop_size = dataset.__loop_size__ + iter_first_order self.loop_count = int(dataset.get_dataset_size() / loop_size * 2) # for self._parallel_mode equal to semi_auto_parallel or auto_parallel, use a complete tensor to # compile, and slice tensor to run. The batch dimension of tensors for compile is device_number # times the batch dimension of tensors for run. Now only support LoopSink. if _get_parallel_mode() in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL): device_num = _get_device_num() self.dataset_shapes = _to_full_shapes(self.dataset_shapes, device_num) def op(): return tuple() self.op = op
def __init__(self, dataset, sink_size, epoch_num, iter_first_order): super().__init__(dataset, sink_size, epoch_num) self.sink_count = self.get_sink_count(dataset, sink_size, iter_first_order) ms_role = os.getenv("MS_ROLE") if ms_role in ("MS_PSERVER", "MS_SCHED"): self.sink_count = 1 # for self._parallel_mode equal to semi_auto_parallel or auto_parallel, and not using full_batch, # use a complete tensor to compile, and slice tensor to run. The batch dimension of tensors for # compile is device_number times the batch dimension of tensors for run. Now only support LoopSink. if _need_to_full(): device_num = _get_device_num() self.dataset_shapes = _to_full_shapes(self.dataset_shapes, device_num) def op(): return tuple() self.op = op
def test_to_full_shapes(): device_num = 16 shapes = [[32, 128], [12], [24, 1, 12]] full_shapes = _to_full_shapes(shapes, device_num) assert full_shapes == [(512, 128), (192, ), (384, 1, 12)]