Ejemplo n.º 1
0
    def load_chunks(self, dev):
        try:
            itr = iter(self.itr)
            with self.dataloader._get_device_ctx(dev):
                spill = None
                for chunks in self.batch(itr):
                    if self.stopped:
                        return

                    if spill and not spill.empty:
                        chunks.insert(0, spill)

                    chunks = cudf.core.reshape.concat(chunks)
                    chunks.reset_index(drop=True, inplace=True)
                    chunks, spill = self.get_batch_div_chunk(chunks, self.dataloader.batch_size)
                    if self.shuffle:
                        _shuffle_df(chunks)

                    if len(chunks) > 0:
                        chunks = self.dataloader.make_tensors(chunks, self.dataloader._use_nnz)
                        # put returns True if buffer is stopped before
                        # packet can be put in queue. Keeps us from
                        # freezing on a put on a full queue
                        if self.put(chunks):
                            return
                    chunks = None

                # takes care final batch, which is less than batch size
                if not self.dataloader.drop_last and spill is not None and not spill.empty:
                    spill = self.dataloader.make_tensors(spill, self.dataloader._use_nnz)
                    self.put(spill)
        except Exception as e:
            self.put(e)
Ejemplo n.º 2
0
    def chunk_logic(self, itr):
        spill = None
        for chunks in self.batch(itr):
            if self.stopped:
                return

            if spill is not None and not spill.empty:
                chunks.insert(0, spill)

            chunks = _concat(chunks)
            chunks.reset_index(drop=True, inplace=True)
            chunks, spill = self.get_batch_div_chunk(
                chunks, self.dataloader.batch_size)
            if self.shuffle:
                chunks = _shuffle_df(chunks)

            if len(chunks) > 0:
                chunks = self.dataloader.make_tensors(chunks,
                                                      self.dataloader._use_nnz)
                # put returns True if buffer is stopped before
                # packet can be put in queue. Keeps us from
                # freezing on a put on a full queue
                if self.put(chunks):
                    return
            chunks = None
        # takes care final batch, which is less than batch size
        if not self.dataloader.drop_last and spill is not None and not spill.empty:
            spill = self.dataloader.make_tensors(spill,
                                                 self.dataloader._use_nnz)
            self.put(spill)