def test_dataloader_context(): X = np.random.uniform(size=(10, 20)) dataset = gluon.data.ArrayDataset(X) default_dev_id = 0 custom_dev_id = 1 # use non-pinned memory loader1 = gluon.data.DataLoader(dataset, 8) for _, x in enumerate(loader1): assert x.context == context.cpu(default_dev_id) # use pinned memory with default device id loader2 = gluon.data.DataLoader(dataset, 8, pin_memory=True) for _, x in enumerate(loader2): assert x.context == context.cpu_pinned(default_dev_id) if mx.device.num_gpus() <= 1: print( 'Bypassing custom_dev_id pinned mem test on system with < 2 gpus.') else: # use pinned memory with custom device id loader3 = gluon.data.DataLoader(dataset, 8, pin_memory=True, pin_device_id=custom_dev_id) for _, x in enumerate(loader3): assert x.context == context.cpu_pinned(custom_dev_id)
def _same_process_iter(): for batch in self._batch_sampler: if isinstance(batch[0], (list, tuple)): rets = [self._batchify_fn([self._dataset[idx] for idx in shard]) for shard in batch] if self._pin_memory: rets = [_as_in_context(ret, context.cpu_pinned()) for ret in rets] yield rets else: ret = self._batchify_fn([self._dataset[idx] for idx in batch]) if self._pin_memory: ret = _as_in_context(ret, context.cpu_pinned()) yield ret
def __next__(self): self._iters += 1 if not self._sample_times or self._iters < self._sample_times: self._push_next() if self._rcvd_idx == self._sent_idx: assert not self._data_buffer, "Data buffer should be empty at this moment" raise StopIteration assert self._rcvd_idx < self._sent_idx, "rcvd_idx must be smaller than sent_idx" assert self._rcvd_idx in self._data_buffer, "fatal error with _push_next, rcvd_idx missing" ret = self._data_buffer.pop(self._rcvd_idx) try: if self._dataset is None: batch = pickle.loads(ret.get(self._timeout)) else: batch = ret.get(self._timeout) if self._pin_memory: batch = _as_in_context(batch, context.cpu_pinned(self._pin_device_id)) batch = batch[0] if len(batch) == 1 else batch self._rcvd_idx += 1 return batch except multiprocessing.context.TimeoutError: msg = '''Worker timed out after {} seconds. This might be caused by \n - Slow transform. Please increase timeout to allow slower data loading in each worker. '''.format(self._timeout) if not isinstance(self._worker_pool, multiprocessing.pool.ThreadPool): msg += '''- Insufficient shared_memory if `timeout` is large enough. Please consider reduce `num_workers` or increase shared_memory in system. ''' print(msg) raise except Exception: self._worker_pool.terminate() raise
def same_process_iter(): for batch in self._batch_sampler: ret = self._batchify_fn( [self._dataset[idx] for idx in batch]) if self._pin_memory: ret = _as_in_context(ret, context.cpu_pinned()) yield ret
def fetcher_loop(data_queue, data_buffer, pin_memory=False): """Fetcher loop for fetching data from queue and put in reorder dict.""" while True: idx, batch = data_queue.get() if idx is None: break if pin_memory: batch = _as_in_context(batch, context.cpu_pinned()) else: batch = _as_in_context(batch, context.cpu()) data_buffer[idx] = batch
def _same_process_iter(): for batch in self._batch_sampler: if isinstance(batch[0], (list, tuple)): rets = [ self._batchify_fn( [self._dataset[idx] for idx in shard]) for shard in batch ] if self._pin_memory: rets = [ _as_in_context(ret, context.cpu_pinned()) for ret in rets ] yield rets else: ret = self._batchify_fn( [self._dataset[idx] for idx in batch]) if self._pin_memory: ret = _as_in_context(ret, context.cpu_pinned()) yield ret
def test_dataloader_context(): X = np.random.uniform(size=(10, 20)) dataset = gluon.data.ArrayDataset(X) default_dev_id = 0 custom_dev_id = 1 # use non-pinned memory loader1 = gluon.data.DataLoader(dataset, 8) for _, x in enumerate(loader1): assert x.context == context.cpu(default_dev_id) # use pinned memory with default device id loader2 = gluon.data.DataLoader(dataset, 8, pin_memory=True) for _, x in enumerate(loader2): assert x.context == context.cpu_pinned(default_dev_id) # use pinned memory with custom device id loader3 = gluon.data.DataLoader(dataset, 8, pin_memory=True, pin_device_id=custom_dev_id) for _, x in enumerate(loader3): assert x.context == context.cpu_pinned(custom_dev_id)
def __next__(self): self._push_next() if self._rcvd_idx == self._sent_idx: assert not self._data_buffer, 'Data buffer should be empty at this moment' raise StopIteration assert self._rcvd_idx < self._sent_idx, 'rcvd_idx must be smaller than sent_idx' assert self._rcvd_idx in self._data_buffer, 'fatal error with _push_next, rcvd_idx missing' ret = self._data_buffer.pop(self._rcvd_idx) batch = pickle.loads(ret.get()) if self._dataset is None else ret.get() if self._pin_memory: batch = _as_in_context(batch, context.cpu_pinned()) self._rcvd_idx += 1 return batch
def __next__(self): self._push_next() if self._rcvd_idx == self._sent_idx: assert not self._data_buffer, "Data buffer should be empty at this moment" raise StopIteration assert self._rcvd_idx < self._sent_idx, "rcvd_idx must be smaller than sent_idx" assert self._rcvd_idx in self._data_buffer, "fatal error with _push_next, rcvd_idx missing" ret = self._data_buffer.pop(self._rcvd_idx) batch = pickle.loads(ret.get()) if self._dataset is None else ret.get() if self._pin_memory: batch = _as_in_context(batch, context.cpu_pinned(self._pin_device_id)) batch = batch[0] if len(batch) == 1 else batch self._rcvd_idx += 1 return batch
def _same_process_iter(): urls = [] dataset = [self._dataset[i] for i in iter(self._file_sampler)] for i, url in enumerate(dataset): urls.append(url) if i < len(dataset) - 1: if len(urls) < self._circle_length: continue if self._circle_length == 1: urls = urls[0] dataset, batch_sampler = _dataset_worker_fn(urls, self._dataset_fn, self._batch_sampler_fn) for batch in batch_sampler: ret = self._batchify_fn([dataset[idx] for idx in batch]) if self._pin_memory: ret = _as_in_context(ret, context.cpu_pinned()) yield ret urls = []
def fetcher_loop_v1(data_queue, data_buffer, pin_memory=False, pin_device_id=0, data_buffer_lock=None): """Fetcher loop for fetching data from queue and put in reorder dict.""" while True: idx, batch = data_queue.get() if idx is None: break if pin_memory: batch = _as_in_context(batch, context.cpu_pinned(pin_device_id)) else: batch = _as_in_context(batch, context.cpu()) if data_buffer_lock is not None: with data_buffer_lock: data_buffer[idx] = batch else: data_buffer[idx] = batch