Python _shuffle_gdf 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nvtabular.io.shuffle

메소드/함수: _shuffle_gdf

hotexamples.com에서의 예제들: 2

Python _shuffle_gdf - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nvtabular.io.shuffle._shuffle_gdf에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: backend.py 프로젝트: rnyak/NVTabular

    def load_chunks(self, dev, dataloader):
        try:
            indices = dataloader._gather_indices_for_dev(dev)
            itr = iter(dataloader.data.to_iter(indices=indices))
            with dataloader._get_device_ctx(dev):
                spill = None
                for chunks in self.batch(itr):
                    if self.stopped:
                        return

                    if spill and not spill.empty:
                        chunks.insert(0, spill)

                    chunks = cudf.core.reshape.concat(chunks)
                    chunks.reset_index(drop=True, inplace=True)
                    chunks, spill = self.get_batch_div_chunk(chunks, dataloader.batch_size)
                    if self.shuffle:
                        _shuffle_gdf(chunks)

                    if len(chunks) > 0:
                        chunks = dataloader.make_tensors(chunks, dataloader._use_nnz)
                        # put returns True if buffer is stopped before
                        # packet can be put in queue. Keeps us from
                        # freezing on a put on a full queue
                        if self.put(chunks):
                            return
                    chunks = None

                # takes care final batch, which is less than batch size
                if spill is not None and not spill.empty:
                    spill = dataloader.make_tensors(spill, dataloader._use_nnz)
                    self.put(spill)
        except Exception as e:
            self.put(e)

예제 #2

파일 보기

    def load_chunks(self, dev, dataloader):
        try:
            indices = dataloader._gather_indices_for_dev(dev)
            itr = iter(dataloader.data.to_iter(indices=indices))
            with dataloader._get_device_ctx(dev):
                spill = None
                for chunks in self.batch(itr):
                    if self.stopped:
                        return

                    if spill and not spill.empty:
                        chunks.insert(0, spill)

                    chunks = cudf.core.reshape.concat(chunks)
                    chunks.reset_index(drop=True, inplace=True)
                    chunks, spill = self.get_batch_div_chunk(
                        chunks, dataloader.batch_size)
                    if self.shuffle:
                        _shuffle_gdf(chunks)

                    num_samples = len(chunks)
                    if num_samples > 0:
                        for workflow in dataloader.workflows:
                            chunks = workflow.apply_ops(chunks)

                        # map from big chunk to fraemwork specific tensors
                        chunks = dataloader._create_tensors(chunks)

                        # split them into batches and map to
                        # the framework-specific output format
                        chunks = [
                            dataloader._create_batch(x, num_samples)
                            for x in chunks
                        ]
                        chunks = zip(*chunks)
                        chunks = [
                            dataloader._handle_tensors(*tensors)
                            for tensors in chunks
                        ]

                        # put returns True if buffer is stopped before
                        # packet can be put in queue. Keeps us from
                        # freezing on a put on a full queue
                        if self.put(chunks):
                            return
                    chunks = None

                # takes care final batch, which is less than batch size
                if spill is not None and not spill.empty:
                    for workflow in dataloader.workflows:
                        spill = workflow.apply_ops(spill)
                    spill = dataloader._create_tensors(spill)
                    spill = dataloader._handle_tensors(*spill)
                    self.put([spill])
        except Exception as e:
            self.put(e)