Esempio n. 1
0
 def test_batched_read(self):
     with tempfile.NamedTemporaryFile() as dst:
         path = dst.name
         with TestPipeline() as p:
             _ = p \
             | Create(self.RECORDS, reshuffle=False) \
             | WriteToParquet(
                 path, self.SCHEMA, num_shards=1, shard_name_template='')
         with TestPipeline() as p:
             # json used for stable sortability
             readback = \
                 p \
                 | ReadFromParquetBatched(path)
             assert_that(readback, equal_to([self._records_as_arrow()]))
Esempio n. 2
0
 def test_batched_read(self):
     with TemporaryDirectory() as tmp_dirname:
         path = os.path.join(tmp_dirname + "tmp_filename")
         with TestPipeline() as p:
             _ = p \
             | Create(self.RECORDS, reshuffle=False) \
             | WriteToParquet(
                 path, self.SCHEMA, num_shards=1, shard_name_template='')
         with TestPipeline() as p:
             # json used for stable sortability
             readback = \
                 p \
                 | ReadFromParquetBatched(path)
             assert_that(readback, equal_to([self._records_as_arrow()]))
Esempio n. 3
0
    def test_read_display_data(self):
        file_name = 'some_parquet_source'
        read = \
          ReadFromParquet(
              file_name,
              validate=False)
        read_batched = \
          ReadFromParquetBatched(
              file_name,
              validate=False)

        expected_items = [
            DisplayDataItemMatcher('compression', 'auto'),
            DisplayDataItemMatcher('file_pattern', file_name)
        ]

        hc.assert_that(
            DisplayData.create_from(read).items,
            hc.contains_inanyorder(*expected_items))
        hc.assert_that(
            DisplayData.create_from(read_batched).items,
            hc.contains_inanyorder(*expected_items))