Exemple #1
0
    def parquet_file(self):
        if not hasattr(self, '_parquet_file'):
            try:
                self._parquet_file = parquet.read_header(self.parquet_path)
            except OSError:
                # Two possibilities:
                #
                # 1. The file is missing.
                # 2. The file is empty. (We used to write empty files in
                #    assign_wf_module.)
                #
                # Either way, our cached DataFrame is "empty", and we represent
                # that as None.
                self._parquet_file = None
            except IndexError:
                # TODO nix this when fastparquet resolves
                # https://github.com/dask/fastparquet/issues/361
                #
                # The file has a zero-length column list, and fastparquet can't
                # handle that.
                #
                # Our cached DataFrame should be "empty". No columns means no
                # rows.
                self._parquet_file = None

        return self._parquet_file
Exemple #2
0
    def parquet_file(self):
        if not hasattr(self, '_parquet_file'):
            try:
                self._parquet_file = parquet.read_header(self.parquet_path)
            except OSError:
                # Two possibilities:
                #
                # 1. The file is missing.
                # 2. The file is empty. (We used to write empty files in
                #    assign_wf_module.)
                #
                # Either way, our cached DataFrame is "empty", and we represent
                # that as None.
                self._parquet_file = None
            except parquet.FastparquetCouldNotHandleFile:
                # Treat bugs as "empty file"
                self._parquet_file = None

        # TODO keep a handle on the file, to guarantee it doesn't disappear
        # from disk until after this CachedRenderResult is destroyed. Until
        # then, every read from self._parquet_file is a race.
        return self._parquet_file
 def test_read_header_issue_361(self):
     # https://github.com/dask/fastparquet/issues/361
     with self._file_on_s3('fastparquet-issue-361.par'):
         header = parquet.read_header(bucket, key)
         self.assertEqual(header.columns, [])
         self.assertEqual(header.count, 3)
 def test_read_header_issue_361(self):
     with self.assertRaises(parquet.FastparquetIssue361):
         parquet.read_header(_path('fastparquet-issue-361.par'))