コード例 #1
0
ファイル: table.py プロジェクト: gc-ss/druzhba
    def avro_to_s3(self, results_iter, results_schema):
        """Attempts to serialize a result set to an AVRO file

        returns true if it complete writes the entire result_iter and false
        if there were records remaining when it hit the maximum file size.
        """
        with BytesIO() as f:
            complete, row_count = write_avro_file(
                f,
                results_iter,
                results_schema,
                self.destination_table_name,
                self.max_file_size,
            )

            if self.row_count is None:
                self.row_count = row_count
            else:
                self.row_count += row_count

            self.upload_size += f.tell()

            if not complete:
                self.manifest_mode = True

            if row_count > 0:
                self._upload_s3(
                    f, get_redshift().s3_config.bucket, self.next_s3_data_file_key()
                )
                self.num_data_files += 1

        return complete
コード例 #2
0
ファイル: test_avro.py プロジェクト: steverit/druzhba
    def test_write_avro_increment_full(self):
        data = itertools.repeat({"a": 1, "b": "foo"}, 10)

        with BytesIO() as f:
            out = write_avro_file(f, data, self.fields, "tbl", 1024)

        # method should return true if we fully drain the iterator
        self.assertTrue(out)

        # confirm we read to the end of the iterator
        remaining = len([x for x in data])
        self.assertEqual(remaining, 0)
コード例 #3
0
ファイル: test_avro.py プロジェクト: steverit/druzhba
    def test_write_avro_increment_partial(self):
        data = itertools.repeat({"a": 1, "b": "foo"}, 10000)

        with BytesIO() as f:
            complete, nrows = write_avro_file(f, data, self.fields, "tbl", 1024)

        remaining = len([x for x in data])

        # method should return false if there are elements remaining
        self.assertFalse(complete)

        # confirm we drained some elements from the iterator
        self.assertLess(remaining, 10000)

        # confirm we logged some rows as being consumed
        self.assertGreater(nrows, 0)

        # confirm total rows minus consumed rows equals the number remaining
        self.assertEqual(remaining, 10000 - nrows)

        # meta: confirm there are in fact element remaining
        self.assertGreater(remaining, 0)