def test_batching_no_remainder(self): batches = list(generate_batches(num_records=20, batch_size=10)) assert batches == [(10, 0), (10, 1)] batches = list(generate_batches(num_records=20, batch_size=5)) assert batches == [(5, 0), (5, 1), (5, 2), (5, 3)] batches = list(generate_batches(num_records=3, batch_size=1)) assert batches == [(1, 0), (1, 1), (1, 2)] batches = list(generate_batches(num_records=3, batch_size=3)) assert batches == [(3, 0)]
def _run_task(self): with temporary_dir() as tempdir: for current_batch_size, index in generate_batches( self.num_records, self.batch_size): self._generate_batch( self.database_url, tempdir, self.mapping_file, current_batch_size, index, )
def _run_task(self): with TemporaryDirectory() as tempdir: for current_batch_size, index in generate_batches( self.num_records, self.batch_size): self.logger.info( f"Generating a data batch, batch_size={current_batch_size} " f"index={index} total_records={self.num_records}") self._generate_batch( self.database_url, self.debug_dir or tempdir, self.mapping_file, current_batch_size, index, )
def _run_task(self): with TemporaryDirectory() as tempdir: working_directory = self.options.get("working_directory") if working_directory: tempdir = Path(working_directory) tempdir.mkdir(exist_ok=True) for current_batch_size, index in generate_batches( self.num_records, self.batch_size): self.logger.info( f"Generating a data batch, batch_size={current_batch_size} " f"index={index} total_records={self.num_records}") self._generate_batch( self.database_url, self.working_directory or tempdir, self.mapping_file, current_batch_size, index, )
def test_batching_with_remainder(self): batches = list(generate_batches(num_records=20, batch_size=7)) assert batches == [(7, 0), (7, 1), (6, 2)]