def load_chunks(self, dev, dataloader): try: indices = dataloader._gather_indices_for_dev(dev) itr = iter(dataloader.data.to_iter(indices=indices)) with dataloader._get_device_ctx(dev): spill = None for chunks in self.batch(itr): if self.stopped: return if spill and not spill.empty: chunks.insert(0, spill) chunks = cudf.core.reshape.concat(chunks) chunks.reset_index(drop=True, inplace=True) chunks, spill = self.get_batch_div_chunk(chunks, dataloader.batch_size) if self.shuffle: _shuffle_gdf(chunks) if len(chunks) > 0: chunks = dataloader.make_tensors(chunks, dataloader._use_nnz) # put returns True if buffer is stopped before # packet can be put in queue. Keeps us from # freezing on a put on a full queue if self.put(chunks): return chunks = None # takes care final batch, which is less than batch size if spill is not None and not spill.empty: spill = dataloader.make_tensors(spill, dataloader._use_nnz) self.put(spill) except Exception as e: self.put(e)
def load_chunks(self, dev, dataloader): try: indices = dataloader._gather_indices_for_dev(dev) itr = iter(dataloader.data.to_iter(indices=indices)) with dataloader._get_device_ctx(dev): spill = None for chunks in self.batch(itr): if self.stopped: return if spill and not spill.empty: chunks.insert(0, spill) chunks = cudf.core.reshape.concat(chunks) chunks.reset_index(drop=True, inplace=True) chunks, spill = self.get_batch_div_chunk( chunks, dataloader.batch_size) if self.shuffle: _shuffle_gdf(chunks) num_samples = len(chunks) if num_samples > 0: for workflow in dataloader.workflows: chunks = workflow.apply_ops(chunks) # map from big chunk to fraemwork specific tensors chunks = dataloader._create_tensors(chunks) # split them into batches and map to # the framework-specific output format chunks = [ dataloader._create_batch(x, num_samples) for x in chunks ] chunks = zip(*chunks) chunks = [ dataloader._handle_tensors(*tensors) for tensors in chunks ] # put returns True if buffer is stopped before # packet can be put in queue. Keeps us from # freezing on a put on a full queue if self.put(chunks): return chunks = None # takes care final batch, which is less than batch size if spill is not None and not spill.empty: for workflow in dataloader.workflows: spill = workflow.apply_ops(spill) spill = dataloader._create_tensors(spill) spill = dataloader._handle_tensors(*spill) self.put([spill]) except Exception as e: self.put(e)