Example #1
0
    def __iter__(self):
        if self.fast_mode:
            reader_factory = FastReader
        else:
            reader_factory = SlowReader

        with self.csv_input_file_reader() as csvfile:
            reader = reader_factory(csvfile, self.encoding, self._ui)
            fieldnames = reader.fieldnames

            has_content = False
            t0 = time()
            rows_read = 0
            for chunk in iter_chunks(reader, self.chunksize):
                has_content = True
                n_rows = len(chunk)
                if (rows_read, n_rows) not in self.already_processed_batches:
                    yield Batch(rows_read, n_rows, fieldnames,
                                chunk, self.rty_cnt)
                rows_read += n_rows
            if not has_content:
                raise ValueError("Input file '{}' is empty.".format(
                    self.dataset))
            self._ui.info('chunking {} rows took {}'.format(rows_read,
                                                            time() - t0))
Example #2
0
def test_iter_chunks():
    csvfile = [[1, 'a'],
               [2, 'b'],
               [3, 'c']]

    it = iter_chunks(csvfile, 2)
    chunk1 = next(it)
    assert [[1, 'a'], [2, 'b']] == chunk1
    chunk2 = next(it)
    assert [[3, 'c']] == chunk2
    with pytest.raises(StopIteration):
        next(it)