Esempio n. 1
0
 def test_read_all_from_parquet_file_pattern(self):
     file_pattern = self._write_pattern(5)
     with TestPipeline() as p:
         assert_that(
             p \
             | Create([file_pattern]) \
             | ReadAllFromParquet(),
             equal_to(self.RECORDS * 5))
     with TestPipeline() as p:
         assert_that(
             p \
             | Create([file_pattern]) \
             | ReadAllFromParquetBatched(),
             equal_to([self._records_as_arrow()] * 5))
Esempio n. 2
0
    def test_read_all_from_parquet_single_file(self):
        path = self._write_data()
        with TestPipeline() as p:
            assert_that(
                p \
                | Create([path]) \
                | ReadAllFromParquet(),
                equal_to(self.RECORDS))

        with TestPipeline() as p:
            assert_that(
                p \
                | Create([path]) \
                | ReadAllFromParquetBatched(),
                equal_to([self._records_as_arrow()]))
Esempio n. 3
0
 def test_read_all_from_parquet_many_file_patterns(self):
     file_pattern1 = self._write_pattern(5)
     file_pattern2 = self._write_pattern(2)
     file_pattern3 = self._write_pattern(3)
     with TestPipeline() as p:
         assert_that(
             p \
             | Create([file_pattern1, file_pattern2, file_pattern3]) \
             | ReadAllFromParquet(),
             equal_to(self.RECORDS * 10))
     with TestPipeline() as p:
         assert_that(
             p \
             | Create([file_pattern1, file_pattern2, file_pattern3]) \
             | ReadAllFromParquetBatched(),
             equal_to([self._records_as_arrow()] * 10))
Esempio n. 4
0
 def test_read_all_from_parquet_many_single_files(self):
     path1 = self._write_data()
     path2 = self._write_data()
     path3 = self._write_data()
     with TestPipeline() as p:
         assert_that(
             p \
             | Create([path1, path2, path3]) \
             | ReadAllFromParquet(),
             equal_to(self.RECORDS * 3))
     with TestPipeline() as p:
         assert_that(
             p \
             | Create([path1, path2, path3]) \
             | ReadAllFromParquetBatched(),
             equal_to([self._records_as_arrow()] * 3))