def avro_to_s3(self, results_iter, results_schema): """Attempts to serialize a result set to an AVRO file returns true if it complete writes the entire result_iter and false if there were records remaining when it hit the maximum file size. """ with BytesIO() as f: complete, row_count = write_avro_file( f, results_iter, results_schema, self.destination_table_name, self.max_file_size, ) if self.row_count is None: self.row_count = row_count else: self.row_count += row_count self.upload_size += f.tell() if not complete: self.manifest_mode = True if row_count > 0: self._upload_s3( f, get_redshift().s3_config.bucket, self.next_s3_data_file_key() ) self.num_data_files += 1 return complete
def test_write_avro_increment_full(self): data = itertools.repeat({"a": 1, "b": "foo"}, 10) with BytesIO() as f: out = write_avro_file(f, data, self.fields, "tbl", 1024) # method should return true if we fully drain the iterator self.assertTrue(out) # confirm we read to the end of the iterator remaining = len([x for x in data]) self.assertEqual(remaining, 0)
def test_write_avro_increment_partial(self): data = itertools.repeat({"a": 1, "b": "foo"}, 10000) with BytesIO() as f: complete, nrows = write_avro_file(f, data, self.fields, "tbl", 1024) remaining = len([x for x in data]) # method should return false if there are elements remaining self.assertFalse(complete) # confirm we drained some elements from the iterator self.assertLess(remaining, 10000) # confirm we logged some rows as being consumed self.assertGreater(nrows, 0) # confirm total rows minus consumed rows equals the number remaining self.assertEqual(remaining, 10000 - nrows) # meta: confirm there are in fact element remaining self.assertGreater(remaining, 0)